From 86972f111497bd15df8da181d0d7ac68b866320b Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Tue, 26 Oct 2021 14:53:53 +0000 Subject: [PATCH] [AArch64][SVE] Use TargetFrameIndex in more SVE load/store addressing modes Add support for generating TargetFrameIndex in complex patterns for indexed addressing modes in SVE. Additionally, add missing load/stores to getMemOpInfo and getLoadStoreImmIdx. Differential Revision: https://reviews.llvm.org/D112617 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 13 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 85 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 - .../AArch64/named-vector-shuffles-sve.ll | 260 +- .../AArch64/sve-calling-convention-mixed.ll | 33 +- .../AArch64/sve-fixed-length-fp-vselect.ll | 1698 ++++---- .../AArch64/sve-fixed-length-int-vselect.ll | 3646 ++++++++--------- .../sve-fixed-length-vector-shuffle.ll | 3 +- .../CodeGen/AArch64/sve-insert-element.ll | 14 +- .../test/CodeGen/AArch64/sve-insert-vector.ll | 44 +- llvm/test/CodeGen/AArch64/sve-ldnf1.mir | 277 ++ llvm/test/CodeGen/AArch64/sve-ldstnt1.mir | 203 + .../CodeGen/AArch64/sve-split-extract-elt.ll | 82 +- .../CodeGen/AArch64/sve-split-insert-elt.ll | 52 +- 14 files changed, 3462 insertions(+), 2956 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-ldnf1.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ldstnt1.mir diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7fe1ecaa68bb..fe9b2f8883b9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5050,6 +5050,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, SDValue &OffImm) { const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); + const DataLayout &DL = CurDAG->getDataLayout(); + + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); + return true; + } if (MemVT == EVT()) return false; @@ -5073,6 +5081,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, return false; Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + } + OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 6f5e32c5ced5..c79c19b2fbeb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2263,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STNPSi: case AArch64::LDG: case AArch64::STGPi: + case AArch64::LD1B_IMM: - case AArch64::LD1H_IMM: - case AArch64::LD1W_IMM: - case AArch64::LD1D_IMM: - case AArch64::ST1B_IMM: - case AArch64::ST1H_IMM: - case AArch64::ST1W_IMM: - case AArch64::ST1D_IMM: case AArch64::LD1B_H_IMM: + case AArch64::LD1B_S_IMM: + case AArch64::LD1B_D_IMM: case AArch64::LD1SB_H_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::LD1H_IMM: case AArch64::LD1H_S_IMM: + case AArch64::LD1H_D_IMM: case AArch64::LD1SH_S_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::LD1W_IMM: case AArch64::LD1W_D_IMM: case AArch64::LD1SW_D_IMM: + case AArch64::LD1D_IMM: + + case AArch64::ST1B_IMM: case AArch64::ST1B_H_IMM: - case AArch64::ST1H_S_IMM: - case AArch64::ST1W_D_IMM: - case AArch64::LD1B_S_IMM: - case AArch64::LD1SB_S_IMM: - case AArch64::LD1H_D_IMM: - case AArch64::LD1SH_D_IMM: case AArch64::ST1B_S_IMM: - case AArch64::ST1H_D_IMM: - case AArch64::LD1B_D_IMM: - case AArch64::LD1SB_D_IMM: case AArch64::ST1B_D_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1H_D_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1W_D_IMM: + case AArch64::ST1D_IMM: + case AArch64::LD1RB_IMM: case AArch64::LD1RB_H_IMM: case AArch64::LD1RB_S_IMM: @@ -2305,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::LD1RW_D_IMM: case AArch64::LD1RSW_IMM: case AArch64::LD1RD_IMM: + + case AArch64::LDNT1B_ZRI: + case AArch64::LDNT1H_ZRI: + case AArch64::LDNT1W_ZRI: + case AArch64::LDNT1D_ZRI: + case AArch64::STNT1B_ZRI: + case AArch64::STNT1H_ZRI: + case AArch64::STNT1W_ZRI: + case AArch64::STNT1D_ZRI: + + case AArch64::LDNF1B_IMM: + case AArch64::LDNF1B_H_IMM: + case AArch64::LDNF1B_S_IMM: + case AArch64::LDNF1B_D_IMM: + case AArch64::LDNF1SB_H_IMM: + case AArch64::LDNF1SB_S_IMM: + case AArch64::LDNF1SB_D_IMM: + case AArch64::LDNF1H_IMM: + case AArch64::LDNF1H_S_IMM: + case AArch64::LDNF1H_D_IMM: + case AArch64::LDNF1SH_S_IMM: + case AArch64::LDNF1SH_D_IMM: + case AArch64::LDNF1W_IMM: + case AArch64::LDNF1W_D_IMM: + case AArch64::LDNF1SW_D_IMM: + case AArch64::LDNF1D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: @@ -2855,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1H_IMM: case AArch64::LD1W_IMM: case AArch64::LD1D_IMM: + case AArch64::LDNT1B_ZRI: + case AArch64::LDNT1H_ZRI: + case AArch64::LDNT1W_ZRI: + case AArch64::LDNT1D_ZRI: case AArch64::ST1B_IMM: case AArch64::ST1H_IMM: case AArch64::ST1W_IMM: case AArch64::ST1D_IMM: + case AArch64::STNT1B_ZRI: + case AArch64::STNT1H_ZRI: + case AArch64::STNT1W_ZRI: + case AArch64::STNT1D_ZRI: + case AArch64::LDNF1B_IMM: + case AArch64::LDNF1H_IMM: + case AArch64::LDNF1W_IMM: + case AArch64::LDNF1D_IMM: // A full vectors worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(16); @@ -2875,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::ST1B_H_IMM: case AArch64::ST1H_S_IMM: case AArch64::ST1W_D_IMM: + case AArch64::LDNF1B_H_IMM: + case AArch64::LDNF1SB_H_IMM: + case AArch64::LDNF1H_S_IMM: + case AArch64::LDNF1SH_S_IMM: + case AArch64::LDNF1W_D_IMM: + case AArch64::LDNF1SW_D_IMM: // A half vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(8); @@ -2888,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1SH_D_IMM: case AArch64::ST1B_S_IMM: case AArch64::ST1H_D_IMM: + case AArch64::LDNF1B_S_IMM: + case AArch64::LDNF1SB_S_IMM: + case AArch64::LDNF1H_D_IMM: + case AArch64::LDNF1SH_D_IMM: // A quarter vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(4); @@ -2898,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, case AArch64::LD1B_D_IMM: case AArch64::LD1SB_D_IMM: case AArch64::ST1B_D_IMM: + case AArch64::LDNF1B_D_IMM: + case AArch64::LDNF1SB_D_IMM: // A eighth vector worth of data // Width = mbytes * elements Scale = TypeSize::Scalable(2); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0ac0eb2999d2..67d8fbb45cf5 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2200,10 +2200,6 @@ let Predicates = [HasSVEorStreamingSVE] in { def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)), (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; } - let AddedComplexity = 3 in { - def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)), - (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - } def : Pat<(Store (Ty ZPR:$val), GPR64:$base), (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; @@ -2240,10 +2236,6 @@ let Predicates = [HasSVEorStreamingSVE] in { def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))), (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; } - let AddedComplexity = 3 in { - def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))), - (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - } def : Pat<(Ty (Load GPR64:$base)), (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll index 0077ea3b7ff2..91b2281b167e 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -40,14 +40,14 @@ define @splice_nxv16i8_clamped_idx( %a, @splice_nxv8i16_clamped_idx( %a, @splice_nxv4i32_clamped_idx( %a, @splice_nxv2i64_clamped_idx( %a, @splice_nxv2f16_neg2_idx( %a, @splice_nxv2f16_clamped_idx( %a, < ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: mov w9, #32 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -274,7 +274,7 @@ define @splice_nxv4f16_neg3_idx( %a, @splice_nxv4f16_clamped_idx( %a, < ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: mov w9, #64 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -352,14 +352,14 @@ define @splice_nxv8f16_clamped_idx( %a, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov w10, #128 -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -389,7 +389,7 @@ define @splice_nxv2f32_neg2_idx( %a, @splice_nxv2f32_clamped_idx( %a, ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: mov w9, #32 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -467,14 +467,14 @@ define @splice_nxv4f32_clamped_idx( %a, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #64 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -508,14 +508,14 @@ define @splice_nxv2f64_clamped_idx( % ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #32 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: cmp x8, #32 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -602,13 +602,13 @@ define @splice_nxv8i32_idx( %a, @splice_nxv16f32_clamped_idx( ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w10, #16 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl] -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] +; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] ; CHECK-NEXT: add x10, x9, x8, lsl #2 -; CHECK-NEXT: st1w { z7.s }, p0, [x9, #7, mul vl] -; CHECK-NEXT: st1w { z4.s }, p0, [x9, #4, mul vl] -; CHECK-NEXT: st1w { z5.s }, p0, [x9, #5, mul vl] -; CHECK-NEXT: st1w { z6.s }, p0, [x9, #6, mul vl] +; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] +; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl] @@ -662,7 +662,7 @@ define @splice_nxv16i8( %a, @splice_nxv16i8_clamped( %a, @splice_nxv8i16( %a, @splice_nxv8i16_clamped( %a, @splice_nxv4i32( %a, @splice_nxv4i32_clamped( %a, @splice_nxv2i64( %a, @splice_nxv2i64_clamped( %a, @splice_nxv8f16( %a, @splice_nxv8f16_clamped( %a, @splice_nxv4f32( %a, @splice_nxv4f32_clamped( %a, @splice_nxv2f64( %a, @splice_nxv2f64_clamped( %a, < ; CHECK-NEXT: mov w10, #24 ; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: addvl x10, x8, #1 +; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: sub x9, x10, x9 -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1112,7 +1112,7 @@ define @splice_nxv2i8( %a, ; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 @@ -1129,16 +1129,16 @@ define @splice_nxv8i32( %a, @splice_nxv16f32_clamped( %a, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #4 -; CHECK-NEXT: cmp x9, #68 -; CHECK-NEXT: mov w10, #68 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: rdvl x8, #4 +; CHECK-NEXT: cmp x8, #68 +; CHECK-NEXT: mov w9, #68 +; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: addvl x10, x8, #4 -; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] -; CHECK-NEXT: sub x9, x10, x9 -; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] +; CHECK-NEXT: addvl x9, x10, #4 +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: sub x8, x9, x8 +; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: st1w { z7.s }, p0, [x8, #7, mul vl] -; CHECK-NEXT: st1w { z4.s }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1w { z5.s }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1w { z6.s }, p0, [x8, #6, mul vl] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x9, #2, mul vl] -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9, #3, mul vl] +; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] +; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] +; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] +; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #8 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index b27b7a1e3250..40af9abcc555 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -12,17 +12,16 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] -; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z16.d }, p0, [sp] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: bl callee1 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload @@ -44,24 +43,27 @@ define float @foo2(double* %x0, double* %x1) nounwind { ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w3, #3 ; CHECK-NEXT: mov w4, #4 ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 -; CHECK-NEXT: mov w7, #7 +; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] ; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: mov w7, #7 +; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] +; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 @@ -83,17 +85,16 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind { ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0] ; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] -; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: fmov s1, #2.00000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z16.d }, p0, [sp] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: bl callee3 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll index 4532ba311a8b..8f786863094a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll @@ -55,43 +55,42 @@ define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrh w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrh w8, [x2] ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: sbfx w10, w9, #15, #1 -; CHECK-NEXT: sbfx w11, w9, #14, #1 -; CHECK-NEXT: sbfx w12, w9, #13, #1 -; CHECK-NEXT: strh w10, [sp, #30] -; CHECK-NEXT: sbfx w10, w9, #12, #1 -; CHECK-NEXT: strh w11, [sp, #28] -; CHECK-NEXT: sbfx w11, w9, #11, #1 -; CHECK-NEXT: strh w12, [sp, #26] -; CHECK-NEXT: sbfx w12, w9, #10, #1 -; CHECK-NEXT: strh w10, [sp, #24] -; CHECK-NEXT: sbfx w10, w9, #9, #1 -; CHECK-NEXT: strh w11, [sp, #22] -; CHECK-NEXT: sbfx w11, w9, #8, #1 -; CHECK-NEXT: strh w12, [sp, #20] -; CHECK-NEXT: sbfx w12, w9, #7, #1 -; CHECK-NEXT: strh w10, [sp, #18] -; CHECK-NEXT: sbfx w10, w9, #6, #1 -; CHECK-NEXT: strh w11, [sp, #16] -; CHECK-NEXT: sbfx w11, w9, #5, #1 -; CHECK-NEXT: strh w12, [sp, #14] -; CHECK-NEXT: sbfx w12, w9, #4, #1 -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: sbfx w10, w9, #3, #1 -; CHECK-NEXT: strh w11, [sp, #10] -; CHECK-NEXT: sbfx w11, w9, #2, #1 -; CHECK-NEXT: strh w12, [sp, #8] -; CHECK-NEXT: sbfx w12, w9, #1, #1 -; CHECK-NEXT: sbfx w9, w9, #0, #1 -; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: strh w11, [sp, #4] -; CHECK-NEXT: strh w12, [sp, #2] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] +; CHECK-NEXT: sbfx w9, w8, #15, #1 +; CHECK-NEXT: sbfx w10, w8, #14, #1 +; CHECK-NEXT: sbfx w11, w8, #13, #1 +; CHECK-NEXT: strh w9, [sp, #30] +; CHECK-NEXT: sbfx w9, w8, #12, #1 +; CHECK-NEXT: strh w10, [sp, #28] +; CHECK-NEXT: sbfx w10, w8, #11, #1 +; CHECK-NEXT: strh w11, [sp, #26] +; CHECK-NEXT: sbfx w11, w8, #10, #1 +; CHECK-NEXT: strh w9, [sp, #24] +; CHECK-NEXT: sbfx w9, w8, #9, #1 +; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: sbfx w10, w8, #8, #1 +; CHECK-NEXT: strh w11, [sp, #20] +; CHECK-NEXT: sbfx w11, w8, #7, #1 +; CHECK-NEXT: strh w9, [sp, #18] +; CHECK-NEXT: sbfx w9, w8, #6, #1 +; CHECK-NEXT: strh w10, [sp, #16] +; CHECK-NEXT: sbfx w10, w8, #5, #1 +; CHECK-NEXT: strh w11, [sp, #14] +; CHECK-NEXT: sbfx w11, w8, #4, #1 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: sbfx w9, w8, #3, #1 +; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: sbfx w10, w8, #2, #1 +; CHECK-NEXT: strh w11, [sp, #8] +; CHECK-NEXT: sbfx w11, w8, #1, #1 +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: strh w9, [sp, #6] +; CHECK-NEXT: strh w10, [sp, #4] +; CHECK-NEXT: strh w11, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] ; CHECK-NEXT: and z0.h, z0.h, #0x1 @@ -119,75 +118,74 @@ define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldr w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldr w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 ; VBITS_GE_512-NEXT: ptrue p1.h -; VBITS_GE_512-NEXT: asr w10, w9, #31 -; VBITS_GE_512-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_512-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #62] -; VBITS_GE_512-NEXT: sbfx w10, w9, #28, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #60] -; VBITS_GE_512-NEXT: sbfx w11, w9, #27, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #58] -; VBITS_GE_512-NEXT: sbfx w12, w9, #26, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #56] -; VBITS_GE_512-NEXT: sbfx w10, w9, #25, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #54] -; VBITS_GE_512-NEXT: sbfx w11, w9, #24, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #52] -; VBITS_GE_512-NEXT: sbfx w12, w9, #23, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #50] -; VBITS_GE_512-NEXT: sbfx w10, w9, #22, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #48] -; VBITS_GE_512-NEXT: sbfx w11, w9, #21, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #46] -; VBITS_GE_512-NEXT: sbfx w12, w9, #20, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #44] -; VBITS_GE_512-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #42] -; VBITS_GE_512-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #40] -; VBITS_GE_512-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #38] -; VBITS_GE_512-NEXT: sbfx w10, w9, #16, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #36] -; VBITS_GE_512-NEXT: sbfx w11, w9, #15, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #34] -; VBITS_GE_512-NEXT: sbfx w12, w9, #14, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #32] -; VBITS_GE_512-NEXT: sbfx w10, w9, #13, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #30] -; VBITS_GE_512-NEXT: sbfx w11, w9, #12, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #28] -; VBITS_GE_512-NEXT: sbfx w12, w9, #11, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #26] -; VBITS_GE_512-NEXT: sbfx w10, w9, #10, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #24] -; VBITS_GE_512-NEXT: sbfx w11, w9, #9, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #22] -; VBITS_GE_512-NEXT: sbfx w12, w9, #8, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #20] -; VBITS_GE_512-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #18] -; VBITS_GE_512-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #16] -; VBITS_GE_512-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #14] -; VBITS_GE_512-NEXT: sbfx w10, w9, #4, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #12] -; VBITS_GE_512-NEXT: sbfx w11, w9, #3, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #10] -; VBITS_GE_512-NEXT: sbfx w12, w9, #2, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #8] -; VBITS_GE_512-NEXT: sbfx w10, w9, #1, #1 -; VBITS_GE_512-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #6] -; VBITS_GE_512-NEXT: strh w12, [sp, #4] -; VBITS_GE_512-NEXT: strh w10, [sp, #2] -; VBITS_GE_512-NEXT: strh w9, [sp] -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_512-NEXT: asr w9, w8, #31 +; VBITS_GE_512-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_512-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #62] +; VBITS_GE_512-NEXT: sbfx w9, w8, #28, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #60] +; VBITS_GE_512-NEXT: sbfx w10, w8, #27, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #58] +; VBITS_GE_512-NEXT: sbfx w11, w8, #26, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #56] +; VBITS_GE_512-NEXT: sbfx w9, w8, #25, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #54] +; VBITS_GE_512-NEXT: sbfx w10, w8, #24, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #52] +; VBITS_GE_512-NEXT: sbfx w11, w8, #23, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #50] +; VBITS_GE_512-NEXT: sbfx w9, w8, #22, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #48] +; VBITS_GE_512-NEXT: sbfx w10, w8, #21, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #46] +; VBITS_GE_512-NEXT: sbfx w11, w8, #20, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #44] +; VBITS_GE_512-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #42] +; VBITS_GE_512-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #40] +; VBITS_GE_512-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #38] +; VBITS_GE_512-NEXT: sbfx w9, w8, #16, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #36] +; VBITS_GE_512-NEXT: sbfx w10, w8, #15, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #34] +; VBITS_GE_512-NEXT: sbfx w11, w8, #14, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #32] +; VBITS_GE_512-NEXT: sbfx w9, w8, #13, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #30] +; VBITS_GE_512-NEXT: sbfx w10, w8, #12, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #28] +; VBITS_GE_512-NEXT: sbfx w11, w8, #11, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #26] +; VBITS_GE_512-NEXT: sbfx w9, w8, #10, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #24] +; VBITS_GE_512-NEXT: sbfx w10, w8, #9, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #22] +; VBITS_GE_512-NEXT: sbfx w11, w8, #8, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #20] +; VBITS_GE_512-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #18] +; VBITS_GE_512-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #16] +; VBITS_GE_512-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #14] +; VBITS_GE_512-NEXT: sbfx w9, w8, #4, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #12] +; VBITS_GE_512-NEXT: sbfx w10, w8, #3, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #10] +; VBITS_GE_512-NEXT: sbfx w11, w8, #2, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #8] +; VBITS_GE_512-NEXT: sbfx w9, w8, #1, #1 +; VBITS_GE_512-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #6] +; VBITS_GE_512-NEXT: strh w11, [sp, #4] +; VBITS_GE_512-NEXT: strh w9, [sp, #2] +; VBITS_GE_512-NEXT: strh w8, [sp] +; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1 @@ -215,139 +213,138 @@ define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i1>* %c) #0 { ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldr x9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldr x8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.h, vl64 ; VBITS_GE_1024-NEXT: ptrue p1.h -; VBITS_GE_1024-NEXT: asr x10, x9, #63 -; VBITS_GE_1024-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #126] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #124] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #122] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #120] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #118] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #116] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #114] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #112] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #110] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #108] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #106] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #104] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #102] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #100] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #98] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #96] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #94] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #92] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #90] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #88] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #86] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #84] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #82] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #80] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #78] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #76] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #74] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #72] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #70] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #68] -; VBITS_GE_1024-NEXT: asr w12, w9, #31 -; VBITS_GE_1024-NEXT: strh w10, [sp, #66] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #64] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #62] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #60] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #58] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #56] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #54] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #52] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #50] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #48] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #46] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #44] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #42] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #40] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #38] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #36] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #34] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #30] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #28] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #26] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #24] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #22] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #20] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #18] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #16] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #14] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #12] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #10] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #8] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #6] -; VBITS_GE_1024-NEXT: strh w11, [sp, #4] -; VBITS_GE_1024-NEXT: strh w12, [sp, #2] -; VBITS_GE_1024-NEXT: strh w9, [sp] -; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_1024-NEXT: asr x9, x8, #63 +; VBITS_GE_1024-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #126] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #124] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #122] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #120] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #118] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #116] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #114] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #112] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #110] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #108] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #106] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #104] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #102] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #100] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #98] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #96] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #94] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #92] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #90] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #88] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #86] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #84] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #82] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #80] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #78] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #76] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #74] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #72] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #70] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #68] +; VBITS_GE_1024-NEXT: asr w11, w8, #31 +; VBITS_GE_1024-NEXT: strh w9, [sp, #66] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #64] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #62] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #60] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #58] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #56] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #54] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #52] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #50] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #48] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #46] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #44] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #42] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #40] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #38] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #36] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #34] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #30] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #28] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #26] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #24] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #22] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #20] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #18] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #16] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #14] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #12] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #10] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #8] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #6] +; VBITS_GE_1024-NEXT: strh w10, [sp, #4] +; VBITS_GE_1024-NEXT: strh w11, [sp, #2] +; VBITS_GE_1024-NEXT: strh w8, [sp] +; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.h, z0.h, #0x1 @@ -375,268 +372,267 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i1>* %c) ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr x9, [x2, #8] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr x8, [x2, #8] ; VBITS_GE_2048-NEXT: ptrue p0.h, vl128 ; VBITS_GE_2048-NEXT: ptrue p1.h -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #254] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #252] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #250] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #248] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #246] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #244] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #242] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #238] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #236] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #234] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #232] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #230] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #228] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #226] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #222] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #220] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #218] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #216] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #214] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #212] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #210] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #206] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #204] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #202] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #200] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #198] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #196] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strh w10, [sp, #194] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #190] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #188] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #186] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #184] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #182] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #180] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #178] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #174] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #172] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #170] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #168] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #166] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #164] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #162] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #160] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #158] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #156] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #154] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #152] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #150] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #148] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #146] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #142] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #140] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #138] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #136] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #134] -; VBITS_GE_2048-NEXT: strh w11, [sp, #132] -; VBITS_GE_2048-NEXT: strh w12, [sp, #130] -; VBITS_GE_2048-NEXT: strh w9, [sp, #128] -; VBITS_GE_2048-NEXT: ldr x9, [x2] -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #126] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #124] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #122] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #120] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #118] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #116] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #114] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #110] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #108] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #106] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #104] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #102] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #100] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #98] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #94] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #92] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #90] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #88] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #86] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #84] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #82] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #78] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #76] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #74] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #72] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #70] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #68] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strh w10, [sp, #66] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #62] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #60] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #58] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #56] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #54] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #52] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #50] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #46] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #44] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #42] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #40] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #38] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #36] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #34] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #30] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #28] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #26] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #24] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #22] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #20] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #18] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #16] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #14] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #12] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #10] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #8] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #6] -; VBITS_GE_2048-NEXT: strh w11, [sp, #4] -; VBITS_GE_2048-NEXT: strh w12, [sp, #2] -; VBITS_GE_2048-NEXT: strh w9, [sp] -; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #254] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #252] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #250] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #248] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #246] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #244] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #242] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #238] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #236] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #234] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #232] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #230] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #228] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #226] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #222] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #220] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #218] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #216] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #214] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #212] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #210] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #206] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #204] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #202] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #200] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #198] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #196] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strh w9, [sp, #194] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #190] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #188] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #186] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #184] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #182] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #180] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #178] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #174] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #172] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #170] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #168] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #166] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #164] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #162] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #158] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #156] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #154] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #152] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #150] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #148] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #146] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #142] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #140] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #138] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #136] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #134] +; VBITS_GE_2048-NEXT: strh w10, [sp, #132] +; VBITS_GE_2048-NEXT: strh w11, [sp, #130] +; VBITS_GE_2048-NEXT: strh w8, [sp, #128] +; VBITS_GE_2048-NEXT: ldr x8, [x2] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #126] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #124] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #122] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #120] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #118] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #116] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #114] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #110] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #108] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #106] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #104] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #102] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #100] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #98] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #94] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #92] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #90] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #88] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #86] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #84] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #82] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #78] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #76] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #74] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #72] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #70] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #68] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strh w9, [sp, #66] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #62] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #60] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #58] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #56] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #54] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #52] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #50] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #46] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #44] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #42] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #40] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #38] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #36] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #34] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #30] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #28] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #26] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #24] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #22] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #20] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #18] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #16] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #14] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #12] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #10] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #8] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #6] +; VBITS_GE_2048-NEXT: strh w10, [sp, #4] +; VBITS_GE_2048-NEXT: strh w11, [sp, #2] +; VBITS_GE_2048-NEXT: strh w8, [sp] +; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.h, z0.h, #0x1 @@ -689,23 +685,22 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrb w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrb w8, [x2] ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: sbfx w10, w9, #7, #1 -; CHECK-NEXT: sbfx w11, w9, #6, #1 -; CHECK-NEXT: sbfx w12, w9, #5, #1 -; CHECK-NEXT: sbfx w13, w9, #4, #1 -; CHECK-NEXT: stp w11, w10, [sp, #24] -; CHECK-NEXT: sbfx w10, w9, #3, #1 -; CHECK-NEXT: sbfx w11, w9, #2, #1 -; CHECK-NEXT: stp w13, w12, [sp, #16] -; CHECK-NEXT: sbfx w12, w9, #1, #1 -; CHECK-NEXT: sbfx w9, w9, #0, #1 -; CHECK-NEXT: stp w11, w10, [sp, #8] -; CHECK-NEXT: stp w9, w12, [sp] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: sbfx w9, w8, #7, #1 +; CHECK-NEXT: sbfx w10, w8, #6, #1 +; CHECK-NEXT: sbfx w11, w8, #5, #1 +; CHECK-NEXT: sbfx w12, w8, #4, #1 +; CHECK-NEXT: stp w10, w9, [sp, #24] +; CHECK-NEXT: sbfx w9, w8, #3, #1 +; CHECK-NEXT: sbfx w10, w8, #2, #1 +; CHECK-NEXT: stp w12, w11, [sp, #16] +; CHECK-NEXT: sbfx w11, w8, #1, #1 +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: stp w10, w9, [sp, #8] +; CHECK-NEXT: stp w8, w11, [sp] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] ; CHECK-NEXT: and z0.s, z0.s, #0x1 @@ -733,35 +728,34 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i1>* %c) #0 ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldrh w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldrh w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: ptrue p1.s -; VBITS_GE_512-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_512-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_512-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_512-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_512-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_512-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_512-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_512-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_512-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_512-NEXT: stp w9, w12, [sp] -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_512-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_512-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_512-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_512-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_512-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_512-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_512-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_512-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_512-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_512-NEXT: stp w8, w11, [sp] +; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.s, z0.s, #0x1 @@ -789,59 +783,58 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i1>* %c) #0 ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldr w9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldr w8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 ; VBITS_GE_1024-NEXT: ptrue p1.s -; VBITS_GE_1024-NEXT: asr w10, w9, #31 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_1024-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #28, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #120] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #112] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #24, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #104] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #23, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #22, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #96] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #21, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #20, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #88] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #80] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #16, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #72] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #64] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_1024-NEXT: stp w9, w12, [sp] -; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_1024-NEXT: asr w9, w8, #31 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_1024-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #28, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #120] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #112] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #24, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #104] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #23, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #22, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #96] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #21, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #20, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #88] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #80] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #16, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #72] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #64] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_1024-NEXT: stp w8, w11, [sp] +; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.s, z0.s, #0x1 @@ -869,107 +862,106 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i1>* %c) #0 ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr x9, [x2] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr x8, [x2] ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64 ; VBITS_GE_2048-NEXT: ptrue p1.s -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #60, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #248] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #59, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #58, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #57, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #56, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #232] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #55, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #54, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #53, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #52, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #216] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #48, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #200] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #47, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #46, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #45, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #44, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #184] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #43, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #42, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #41, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #40, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #168] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #160] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #36, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #152] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #35, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #34, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #33, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #32, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #136] -; VBITS_GE_2048-NEXT: asr w10, w9, #31 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #128] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #28, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #120] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #24, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #104] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #23, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #22, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #21, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #20, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #88] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #16, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #72] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_2048-NEXT: stp w9, w12, [sp] -; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #60, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #248] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #59, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #58, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #57, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #56, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #232] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #55, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #54, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #53, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #52, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #216] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #48, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #200] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #47, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #46, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #45, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #44, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #184] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #43, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #42, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #41, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #40, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #168] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #36, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #152] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #35, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #34, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #33, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #32, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #136] +; VBITS_GE_2048-NEXT: asr w9, w8, #31 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #128] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #28, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #120] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #24, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #104] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #23, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #22, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #21, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #20, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #88] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #16, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #72] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_2048-NEXT: stp w8, w11, [sp] +; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.s, z0.s, #0x1 @@ -1023,20 +1015,19 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrb w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrb w8, [x2] ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: lsr w10, w9, #3 -; CHECK-NEXT: lsr w11, w9, #2 -; CHECK-NEXT: sbfx x12, x9, #0, #1 -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: sbfx x10, x10, #0, #1 -; CHECK-NEXT: sbfx x11, x11, #0, #1 +; CHECK-NEXT: lsr w9, w8, #3 +; CHECK-NEXT: lsr w10, w8, #2 +; CHECK-NEXT: sbfx x11, x8, #0, #1 +; CHECK-NEXT: lsr w8, w8, #1 ; CHECK-NEXT: sbfx x9, x9, #0, #1 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: stp x12, x9, [sp] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] +; CHECK-NEXT: sbfx x10, x10, #0, #1 +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: stp x10, x9, [sp, #16] +; CHECK-NEXT: stp x11, x8, [sp] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] ; CHECK-NEXT: and z0.d, z0.d, #0x1 @@ -1064,30 +1055,29 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldrb w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldrb w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d -; VBITS_GE_512-NEXT: lsr w10, w9, #7 -; VBITS_GE_512-NEXT: lsr w11, w9, #6 -; VBITS_GE_512-NEXT: lsr w12, w9, #5 -; VBITS_GE_512-NEXT: lsr w13, w9, #4 +; VBITS_GE_512-NEXT: lsr w9, w8, #7 +; VBITS_GE_512-NEXT: lsr w10, w8, #6 +; VBITS_GE_512-NEXT: lsr w11, w8, #5 +; VBITS_GE_512-NEXT: lsr w12, w8, #4 +; VBITS_GE_512-NEXT: sbfx x9, x9, #0, #1 ; VBITS_GE_512-NEXT: sbfx x10, x10, #0, #1 ; VBITS_GE_512-NEXT: sbfx x11, x11, #0, #1 ; VBITS_GE_512-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_512-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_512-NEXT: lsr w14, w9, #3 -; VBITS_GE_512-NEXT: stp x11, x10, [sp, #48] -; VBITS_GE_512-NEXT: lsr w10, w9, #2 -; VBITS_GE_512-NEXT: stp x13, x12, [sp, #32] -; VBITS_GE_512-NEXT: sbfx x12, x9, #0, #1 -; VBITS_GE_512-NEXT: lsr w9, w9, #1 -; VBITS_GE_512-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_512-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_512-NEXT: lsr w13, w8, #3 +; VBITS_GE_512-NEXT: stp x10, x9, [sp, #48] +; VBITS_GE_512-NEXT: lsr w9, w8, #2 +; VBITS_GE_512-NEXT: stp x12, x11, [sp, #32] +; VBITS_GE_512-NEXT: sbfx x11, x8, #0, #1 +; VBITS_GE_512-NEXT: lsr w8, w8, #1 +; VBITS_GE_512-NEXT: sbfx x10, x13, #0, #1 ; VBITS_GE_512-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_512-NEXT: stp x10, x11, [sp, #16] -; VBITS_GE_512-NEXT: stp x12, x9, [sp] -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_512-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_512-NEXT: stp x9, x10, [sp, #16] +; VBITS_GE_512-NEXT: stp x11, x8, [sp] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.d, z0.d, #0x1 @@ -1115,50 +1105,49 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i1>* %c) ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldrh w9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldrh w8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d -; VBITS_GE_1024-NEXT: lsr w10, w9, #15 -; VBITS_GE_1024-NEXT: lsr w11, w9, #14 -; VBITS_GE_1024-NEXT: lsr w12, w9, #13 -; VBITS_GE_1024-NEXT: lsr w13, w9, #12 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: lsr w14, w9, #11 -; VBITS_GE_1024-NEXT: lsr w15, w9, #10 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #112] -; VBITS_GE_1024-NEXT: lsr w10, w9, #9 -; VBITS_GE_1024-NEXT: stp x13, x12, [sp, #96] -; VBITS_GE_1024-NEXT: lsr w13, w9, #8 -; VBITS_GE_1024-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x15, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: lsr w14, w9, #3 -; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #80] -; VBITS_GE_1024-NEXT: lsr w11, w9, #6 -; VBITS_GE_1024-NEXT: stp x13, x10, [sp, #64] -; VBITS_GE_1024-NEXT: lsr w10, w9, #7 -; VBITS_GE_1024-NEXT: lsr w12, w9, #5 -; VBITS_GE_1024-NEXT: lsr w13, w9, #4 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #48] -; VBITS_GE_1024-NEXT: lsr w11, w9, #2 -; VBITS_GE_1024-NEXT: stp x13, x12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #0, #1 -; VBITS_GE_1024-NEXT: lsr w9, w9, #1 -; VBITS_GE_1024-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: lsr w9, w8, #15 +; VBITS_GE_1024-NEXT: lsr w10, w8, #14 +; VBITS_GE_1024-NEXT: lsr w11, w8, #13 +; VBITS_GE_1024-NEXT: lsr w12, w8, #12 ; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #16] -; VBITS_GE_1024-NEXT: stp x12, x9, [sp] -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: lsr w13, w8, #11 +; VBITS_GE_1024-NEXT: lsr w14, w8, #10 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #112] +; VBITS_GE_1024-NEXT: lsr w9, w8, #9 +; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #96] +; VBITS_GE_1024-NEXT: lsr w12, w8, #8 +; VBITS_GE_1024-NEXT: sbfx x10, x13, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x14, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: lsr w13, w8, #3 +; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #80] +; VBITS_GE_1024-NEXT: lsr w10, w8, #6 +; VBITS_GE_1024-NEXT: stp x12, x9, [sp, #64] +; VBITS_GE_1024-NEXT: lsr w9, w8, #7 +; VBITS_GE_1024-NEXT: lsr w11, w8, #5 +; VBITS_GE_1024-NEXT: lsr w12, w8, #4 +; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #48] +; VBITS_GE_1024-NEXT: lsr w10, w8, #2 +; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #0, #1 +; VBITS_GE_1024-NEXT: lsr w8, w8, #1 +; VBITS_GE_1024-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #16] +; VBITS_GE_1024-NEXT: stp x11, x8, [sp] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.d, z0.d, #0x1 @@ -1186,121 +1175,120 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i1>* %c) ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr w9, [x2] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr w8, [x2] ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d -; VBITS_GE_2048-NEXT: ubfx x10, x9, #31, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #30, #2 +; VBITS_GE_2048-NEXT: ubfx x9, x8, #31, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #30, #2 +; VBITS_GE_2048-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #29, #3 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #28, #4 +; VBITS_GE_2048-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #27, #5 +; VBITS_GE_2048-NEXT: ubfx x14, x8, #26, #6 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x12, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #25, #7 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #23, #9 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x11, x9, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x9, x14, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #24, #8 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x9, x10, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #22, #10 +; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #21, #11 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x11, x9, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx x9, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #20, #12 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #19, #13 ; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #29, #3 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #28, #4 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 ; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #18, #14 ; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 ; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #17, #15 ; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #27, #5 -; VBITS_GE_2048-NEXT: ubfx x15, x9, #26, #6 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: // kill: def $w15 killed $w15 killed $x15 def $x15 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x14, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #25, #7 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #23, #9 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x12, x10, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x10, x15, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #24, #8 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #16, #16 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #15, #17 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x10, x11, #0, #1 +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #14, #18 ; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #22, #10 -; VBITS_GE_2048-NEXT: sbfx x13, x14, #0, #1 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #21, #11 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x12, x10, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx x10, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #20, #12 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #19, #13 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #13, #19 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #128] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #12, #20 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #11, #21 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #10, #22 ; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #18, #14 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #9, #23 ; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #17, #15 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #160] +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #8, #24 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #7, #25 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #6, #26 +; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #5, #27 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #4, #28 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #3, #29 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #2, #30 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #1, #31 +; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 ; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #16, #16 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #15, #17 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #14, #18 -; VBITS_GE_2048-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #13, #19 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #128] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #12, #20 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #11, #21 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #10, #22 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #9, #23 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #8, #24 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #7, #25 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #6, #26 -; VBITS_GE_2048-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #5, #27 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #4, #28 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #3, #29 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #2, #30 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #1, #31 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #16] -; VBITS_GE_2048-NEXT: stp x9, x11, [sp] -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #16] +; VBITS_GE_2048-NEXT: stp x8, x10, [sp] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.d, z0.d, #0x1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll index 4433a31797db..86cef17f1dd9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -54,75 +54,74 @@ define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, <32 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldr w8, [x2] ; CHECK-NEXT: ptrue p0.b, vl32 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: asr w10, w9, #31 -; CHECK-NEXT: sbfx w11, w9, #30, #1 -; CHECK-NEXT: sbfx w12, w9, #29, #1 -; CHECK-NEXT: strb w10, [sp, #31] -; CHECK-NEXT: sbfx w10, w9, #28, #1 -; CHECK-NEXT: strb w11, [sp, #30] -; CHECK-NEXT: sbfx w11, w9, #27, #1 -; CHECK-NEXT: strb w12, [sp, #29] -; CHECK-NEXT: sbfx w12, w9, #26, #1 -; CHECK-NEXT: strb w10, [sp, #28] -; CHECK-NEXT: sbfx w10, w9, #25, #1 -; CHECK-NEXT: strb w11, [sp, #27] -; CHECK-NEXT: sbfx w11, w9, #24, #1 -; CHECK-NEXT: strb w12, [sp, #26] -; CHECK-NEXT: sbfx w12, w9, #23, #1 -; CHECK-NEXT: strb w10, [sp, #25] -; CHECK-NEXT: sbfx w10, w9, #22, #1 -; CHECK-NEXT: strb w11, [sp, #24] -; CHECK-NEXT: sbfx w11, w9, #21, #1 -; CHECK-NEXT: strb w12, [sp, #23] -; CHECK-NEXT: sbfx w12, w9, #20, #1 -; CHECK-NEXT: strb w10, [sp, #22] -; CHECK-NEXT: sbfx w10, w9, #19, #1 -; CHECK-NEXT: strb w11, [sp, #21] -; CHECK-NEXT: sbfx w11, w9, #18, #1 -; CHECK-NEXT: strb w12, [sp, #20] -; CHECK-NEXT: sbfx w12, w9, #17, #1 -; CHECK-NEXT: strb w10, [sp, #19] -; CHECK-NEXT: sbfx w10, w9, #16, #1 -; CHECK-NEXT: strb w11, [sp, #18] -; CHECK-NEXT: sbfx w11, w9, #15, #1 -; CHECK-NEXT: strb w12, [sp, #17] -; CHECK-NEXT: sbfx w12, w9, #14, #1 -; CHECK-NEXT: strb w10, [sp, #16] -; CHECK-NEXT: sbfx w10, w9, #13, #1 -; CHECK-NEXT: strb w11, [sp, #15] -; CHECK-NEXT: sbfx w11, w9, #12, #1 -; CHECK-NEXT: strb w12, [sp, #14] -; CHECK-NEXT: sbfx w12, w9, #11, #1 -; CHECK-NEXT: strb w10, [sp, #13] -; CHECK-NEXT: sbfx w10, w9, #10, #1 -; CHECK-NEXT: strb w11, [sp, #12] -; CHECK-NEXT: sbfx w11, w9, #9, #1 -; CHECK-NEXT: strb w12, [sp, #11] -; CHECK-NEXT: sbfx w12, w9, #8, #1 -; CHECK-NEXT: strb w10, [sp, #10] -; CHECK-NEXT: sbfx w10, w9, #7, #1 -; CHECK-NEXT: strb w11, [sp, #9] -; CHECK-NEXT: sbfx w11, w9, #6, #1 -; CHECK-NEXT: strb w12, [sp, #8] -; CHECK-NEXT: sbfx w12, w9, #5, #1 -; CHECK-NEXT: strb w10, [sp, #7] -; CHECK-NEXT: sbfx w10, w9, #4, #1 -; CHECK-NEXT: strb w11, [sp, #6] -; CHECK-NEXT: sbfx w11, w9, #3, #1 -; CHECK-NEXT: strb w12, [sp, #5] -; CHECK-NEXT: sbfx w12, w9, #2, #1 -; CHECK-NEXT: strb w10, [sp, #4] -; CHECK-NEXT: sbfx w10, w9, #1, #1 -; CHECK-NEXT: sbfx w9, w9, #0, #1 -; CHECK-NEXT: strb w11, [sp, #3] -; CHECK-NEXT: strb w12, [sp, #2] -; CHECK-NEXT: strb w10, [sp, #1] -; CHECK-NEXT: strb w9, [sp] -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] +; CHECK-NEXT: asr w9, w8, #31 +; CHECK-NEXT: sbfx w10, w8, #30, #1 +; CHECK-NEXT: sbfx w11, w8, #29, #1 +; CHECK-NEXT: strb w9, [sp, #31] +; CHECK-NEXT: sbfx w9, w8, #28, #1 +; CHECK-NEXT: strb w10, [sp, #30] +; CHECK-NEXT: sbfx w10, w8, #27, #1 +; CHECK-NEXT: strb w11, [sp, #29] +; CHECK-NEXT: sbfx w11, w8, #26, #1 +; CHECK-NEXT: strb w9, [sp, #28] +; CHECK-NEXT: sbfx w9, w8, #25, #1 +; CHECK-NEXT: strb w10, [sp, #27] +; CHECK-NEXT: sbfx w10, w8, #24, #1 +; CHECK-NEXT: strb w11, [sp, #26] +; CHECK-NEXT: sbfx w11, w8, #23, #1 +; CHECK-NEXT: strb w9, [sp, #25] +; CHECK-NEXT: sbfx w9, w8, #22, #1 +; CHECK-NEXT: strb w10, [sp, #24] +; CHECK-NEXT: sbfx w10, w8, #21, #1 +; CHECK-NEXT: strb w11, [sp, #23] +; CHECK-NEXT: sbfx w11, w8, #20, #1 +; CHECK-NEXT: strb w9, [sp, #22] +; CHECK-NEXT: sbfx w9, w8, #19, #1 +; CHECK-NEXT: strb w10, [sp, #21] +; CHECK-NEXT: sbfx w10, w8, #18, #1 +; CHECK-NEXT: strb w11, [sp, #20] +; CHECK-NEXT: sbfx w11, w8, #17, #1 +; CHECK-NEXT: strb w9, [sp, #19] +; CHECK-NEXT: sbfx w9, w8, #16, #1 +; CHECK-NEXT: strb w10, [sp, #18] +; CHECK-NEXT: sbfx w10, w8, #15, #1 +; CHECK-NEXT: strb w11, [sp, #17] +; CHECK-NEXT: sbfx w11, w8, #14, #1 +; CHECK-NEXT: strb w9, [sp, #16] +; CHECK-NEXT: sbfx w9, w8, #13, #1 +; CHECK-NEXT: strb w10, [sp, #15] +; CHECK-NEXT: sbfx w10, w8, #12, #1 +; CHECK-NEXT: strb w11, [sp, #14] +; CHECK-NEXT: sbfx w11, w8, #11, #1 +; CHECK-NEXT: strb w9, [sp, #13] +; CHECK-NEXT: sbfx w9, w8, #10, #1 +; CHECK-NEXT: strb w10, [sp, #12] +; CHECK-NEXT: sbfx w10, w8, #9, #1 +; CHECK-NEXT: strb w11, [sp, #11] +; CHECK-NEXT: sbfx w11, w8, #8, #1 +; CHECK-NEXT: strb w9, [sp, #10] +; CHECK-NEXT: sbfx w9, w8, #7, #1 +; CHECK-NEXT: strb w10, [sp, #9] +; CHECK-NEXT: sbfx w10, w8, #6, #1 +; CHECK-NEXT: strb w11, [sp, #8] +; CHECK-NEXT: sbfx w11, w8, #5, #1 +; CHECK-NEXT: strb w9, [sp, #7] +; CHECK-NEXT: sbfx w9, w8, #4, #1 +; CHECK-NEXT: strb w10, [sp, #6] +; CHECK-NEXT: sbfx w10, w8, #3, #1 +; CHECK-NEXT: strb w11, [sp, #5] +; CHECK-NEXT: sbfx w11, w8, #2, #1 +; CHECK-NEXT: strb w9, [sp, #4] +; CHECK-NEXT: sbfx w9, w8, #1, #1 +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: strb w10, [sp, #3] +; CHECK-NEXT: strb w11, [sp, #2] +; CHECK-NEXT: strb w9, [sp, #1] +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1] ; CHECK-NEXT: and z0.b, z0.b, #0x1 @@ -150,139 +149,138 @@ define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, <64 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldr x9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldr x8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.b, vl64 ; VBITS_GE_512-NEXT: ptrue p1.b -; VBITS_GE_512-NEXT: asr x10, x9, #63 -; VBITS_GE_512-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_512-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #63] -; VBITS_GE_512-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #62] -; VBITS_GE_512-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #61] -; VBITS_GE_512-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #60] -; VBITS_GE_512-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #59] -; VBITS_GE_512-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #58] -; VBITS_GE_512-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #57] -; VBITS_GE_512-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #56] -; VBITS_GE_512-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #55] -; VBITS_GE_512-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #54] -; VBITS_GE_512-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #53] -; VBITS_GE_512-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #52] -; VBITS_GE_512-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #51] -; VBITS_GE_512-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #50] -; VBITS_GE_512-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #49] -; VBITS_GE_512-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #48] -; VBITS_GE_512-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #47] -; VBITS_GE_512-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #46] -; VBITS_GE_512-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #45] -; VBITS_GE_512-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #44] -; VBITS_GE_512-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #43] -; VBITS_GE_512-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #42] -; VBITS_GE_512-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #41] -; VBITS_GE_512-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #40] -; VBITS_GE_512-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #39] -; VBITS_GE_512-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #38] -; VBITS_GE_512-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #37] -; VBITS_GE_512-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #36] -; VBITS_GE_512-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #35] -; VBITS_GE_512-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #34] -; VBITS_GE_512-NEXT: asr w12, w9, #31 -; VBITS_GE_512-NEXT: strb w10, [sp, #33] -; VBITS_GE_512-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #32] -; VBITS_GE_512-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #31] -; VBITS_GE_512-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #30] -; VBITS_GE_512-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #29] -; VBITS_GE_512-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #28] -; VBITS_GE_512-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #27] -; VBITS_GE_512-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #26] -; VBITS_GE_512-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #25] -; VBITS_GE_512-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #24] -; VBITS_GE_512-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #23] -; VBITS_GE_512-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #22] -; VBITS_GE_512-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #21] -; VBITS_GE_512-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #20] -; VBITS_GE_512-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #19] -; VBITS_GE_512-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #18] -; VBITS_GE_512-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #17] -; VBITS_GE_512-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #16] -; VBITS_GE_512-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #15] -; VBITS_GE_512-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #14] -; VBITS_GE_512-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #13] -; VBITS_GE_512-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #12] -; VBITS_GE_512-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #11] -; VBITS_GE_512-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #10] -; VBITS_GE_512-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #9] -; VBITS_GE_512-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #8] -; VBITS_GE_512-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #7] -; VBITS_GE_512-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #6] -; VBITS_GE_512-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_512-NEXT: strb w11, [sp, #5] -; VBITS_GE_512-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_512-NEXT: strb w12, [sp, #4] -; VBITS_GE_512-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_512-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_512-NEXT: strb w10, [sp, #3] -; VBITS_GE_512-NEXT: strb w11, [sp, #2] -; VBITS_GE_512-NEXT: strb w12, [sp, #1] -; VBITS_GE_512-NEXT: strb w9, [sp] -; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x8] +; VBITS_GE_512-NEXT: asr x9, x8, #63 +; VBITS_GE_512-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_512-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #63] +; VBITS_GE_512-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #62] +; VBITS_GE_512-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #61] +; VBITS_GE_512-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #60] +; VBITS_GE_512-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #59] +; VBITS_GE_512-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #58] +; VBITS_GE_512-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #57] +; VBITS_GE_512-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #56] +; VBITS_GE_512-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #55] +; VBITS_GE_512-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #54] +; VBITS_GE_512-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #53] +; VBITS_GE_512-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #52] +; VBITS_GE_512-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #51] +; VBITS_GE_512-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #50] +; VBITS_GE_512-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #49] +; VBITS_GE_512-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #48] +; VBITS_GE_512-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #47] +; VBITS_GE_512-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #46] +; VBITS_GE_512-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #45] +; VBITS_GE_512-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #44] +; VBITS_GE_512-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #43] +; VBITS_GE_512-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #42] +; VBITS_GE_512-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #41] +; VBITS_GE_512-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #40] +; VBITS_GE_512-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #39] +; VBITS_GE_512-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #38] +; VBITS_GE_512-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #37] +; VBITS_GE_512-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #36] +; VBITS_GE_512-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #35] +; VBITS_GE_512-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #34] +; VBITS_GE_512-NEXT: asr w11, w8, #31 +; VBITS_GE_512-NEXT: strb w9, [sp, #33] +; VBITS_GE_512-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #32] +; VBITS_GE_512-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #31] +; VBITS_GE_512-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #30] +; VBITS_GE_512-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #29] +; VBITS_GE_512-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #28] +; VBITS_GE_512-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #27] +; VBITS_GE_512-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #26] +; VBITS_GE_512-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #25] +; VBITS_GE_512-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #24] +; VBITS_GE_512-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #23] +; VBITS_GE_512-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #22] +; VBITS_GE_512-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #21] +; VBITS_GE_512-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #20] +; VBITS_GE_512-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #19] +; VBITS_GE_512-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #18] +; VBITS_GE_512-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #17] +; VBITS_GE_512-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #16] +; VBITS_GE_512-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #15] +; VBITS_GE_512-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #14] +; VBITS_GE_512-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #13] +; VBITS_GE_512-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #12] +; VBITS_GE_512-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #11] +; VBITS_GE_512-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #10] +; VBITS_GE_512-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #9] +; VBITS_GE_512-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #8] +; VBITS_GE_512-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #7] +; VBITS_GE_512-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #6] +; VBITS_GE_512-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_512-NEXT: strb w10, [sp, #5] +; VBITS_GE_512-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_512-NEXT: strb w11, [sp, #4] +; VBITS_GE_512-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_512-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_512-NEXT: strb w9, [sp, #3] +; VBITS_GE_512-NEXT: strb w10, [sp, #2] +; VBITS_GE_512-NEXT: strb w11, [sp, #1] +; VBITS_GE_512-NEXT: strb w8, [sp] +; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { z2.b }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.b, z0.b, #0x1 @@ -310,268 +308,267 @@ define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, <128 x i1>* %c) #0 { ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldr x9, [x2, #8] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldr x8, [x2, #8] ; VBITS_GE_1024-NEXT: ptrue p0.b, vl128 ; VBITS_GE_1024-NEXT: ptrue p1.b -; VBITS_GE_1024-NEXT: asr x10, x9, #63 -; VBITS_GE_1024-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #127] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #126] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #125] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #124] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #123] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #122] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #121] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #120] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #119] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #118] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #117] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #116] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #115] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #114] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #113] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #112] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #111] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #110] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #109] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #108] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #107] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #106] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #105] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #104] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #103] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #102] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #101] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #100] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #99] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #98] -; VBITS_GE_1024-NEXT: asr w12, w9, #31 -; VBITS_GE_1024-NEXT: strb w10, [sp, #97] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #96] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #95] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #94] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #93] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #92] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #91] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #90] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #89] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #88] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #87] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #86] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #85] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #84] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #83] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #82] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #81] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #80] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #79] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #78] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #77] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #76] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #75] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #74] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #73] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #72] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #71] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #70] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #69] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #68] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #67] -; VBITS_GE_1024-NEXT: strb w11, [sp, #66] -; VBITS_GE_1024-NEXT: strb w12, [sp, #65] -; VBITS_GE_1024-NEXT: strb w9, [sp, #64] -; VBITS_GE_1024-NEXT: ldr x9, [x2] -; VBITS_GE_1024-NEXT: asr x10, x9, #63 -; VBITS_GE_1024-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #63] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #62] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #61] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #60] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #59] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #58] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #57] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #56] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #55] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #54] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #53] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #52] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #51] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #50] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #49] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #48] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #47] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #46] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #45] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #44] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #43] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #42] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #41] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #40] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #39] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #38] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #37] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #36] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #35] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #34] -; VBITS_GE_1024-NEXT: asr w12, w9, #31 -; VBITS_GE_1024-NEXT: strb w10, [sp, #33] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #31] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #30] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #29] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #28] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #27] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #26] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #25] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #24] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #23] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #22] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #21] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #20] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #19] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #18] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #17] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #16] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #15] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #14] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #13] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #12] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #11] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #10] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #9] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #8] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #7] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #6] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: strb w11, [sp, #5] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: strb w12, [sp, #4] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: strb w10, [sp, #3] -; VBITS_GE_1024-NEXT: strb w11, [sp, #2] -; VBITS_GE_1024-NEXT: strb w12, [sp, #1] -; VBITS_GE_1024-NEXT: strb w9, [sp] -; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x8] +; VBITS_GE_1024-NEXT: asr x9, x8, #63 +; VBITS_GE_1024-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #127] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #126] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #125] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #124] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #123] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #122] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #121] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #120] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #119] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #118] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #117] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #116] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #115] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #114] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #113] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #112] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #111] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #110] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #109] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #108] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #107] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #106] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #105] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #104] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #103] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #102] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #101] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #100] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #99] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #98] +; VBITS_GE_1024-NEXT: asr w11, w8, #31 +; VBITS_GE_1024-NEXT: strb w9, [sp, #97] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #96] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #95] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #94] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #93] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #92] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #91] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #90] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #89] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #88] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #87] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #86] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #85] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #84] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #83] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #82] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #81] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #80] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #79] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #78] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #77] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #76] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #75] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #74] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #73] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #72] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #71] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #70] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #69] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #68] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #67] +; VBITS_GE_1024-NEXT: strb w10, [sp, #66] +; VBITS_GE_1024-NEXT: strb w11, [sp, #65] +; VBITS_GE_1024-NEXT: strb w8, [sp, #64] +; VBITS_GE_1024-NEXT: ldr x8, [x2] +; VBITS_GE_1024-NEXT: asr x9, x8, #63 +; VBITS_GE_1024-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #63] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #62] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #61] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #60] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #59] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #58] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #57] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #56] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #55] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #54] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #53] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #52] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #51] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #50] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #49] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #48] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #47] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #46] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #45] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #44] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #43] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #42] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #41] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #40] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #39] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #38] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #37] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #36] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #35] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #34] +; VBITS_GE_1024-NEXT: asr w11, w8, #31 +; VBITS_GE_1024-NEXT: strb w9, [sp, #33] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #31] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #30] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #29] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #28] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #27] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #26] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #25] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #24] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #23] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #22] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #21] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #20] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #19] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #18] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #17] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #16] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #15] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #14] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #13] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #12] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #11] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #10] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #9] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #8] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #7] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #6] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: strb w10, [sp, #5] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: strb w11, [sp, #4] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: strb w9, [sp, #3] +; VBITS_GE_1024-NEXT: strb w10, [sp, #2] +; VBITS_GE_1024-NEXT: strb w11, [sp, #1] +; VBITS_GE_1024-NEXT: strb w8, [sp] +; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1b { z2.b }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.b, z0.b, #0x1 @@ -599,526 +596,525 @@ define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, <256 x i1>* %c) #0 { ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr x9, [x2, #24] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr x8, [x2, #24] ; VBITS_GE_2048-NEXT: ptrue p0.b, vl256 ; VBITS_GE_2048-NEXT: ptrue p1.b -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #255] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #254] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #253] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #252] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #251] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #250] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #249] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #248] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #247] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #246] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #245] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #244] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #243] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #242] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #241] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #239] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #238] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #237] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #236] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #235] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #234] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #233] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #232] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #231] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #230] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #229] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #228] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #227] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #226] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strb w10, [sp, #225] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #223] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #222] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #221] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #220] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #219] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #218] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #217] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #216] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #215] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #214] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #213] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #212] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #211] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #210] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #209] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #207] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #206] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #205] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #204] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #203] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #202] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #201] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #200] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #199] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #198] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #197] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #196] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #195] -; VBITS_GE_2048-NEXT: strb w11, [sp, #194] -; VBITS_GE_2048-NEXT: strb w12, [sp, #193] -; VBITS_GE_2048-NEXT: strb w9, [sp, #192] -; VBITS_GE_2048-NEXT: ldr x9, [x2, #16] -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #191] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #190] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #189] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #188] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #187] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #186] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #185] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #184] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #183] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #182] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #181] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #180] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #179] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #178] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #177] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #175] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #174] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #173] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #172] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #171] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #170] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #169] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #168] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #167] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #166] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #165] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #164] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #163] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #162] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strb w10, [sp, #161] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #160] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #159] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #158] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #157] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #156] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #155] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #154] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #153] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #152] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #151] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #150] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #149] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #148] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #147] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #146] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #145] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #143] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #142] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #141] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #140] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #139] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #138] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #137] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #136] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #135] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #134] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #133] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #132] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #131] -; VBITS_GE_2048-NEXT: strb w11, [sp, #130] -; VBITS_GE_2048-NEXT: strb w12, [sp, #129] -; VBITS_GE_2048-NEXT: strb w9, [sp, #128] -; VBITS_GE_2048-NEXT: ldr x9, [x2, #8] -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #127] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #126] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #125] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #124] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #123] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #122] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #121] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #120] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #119] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #118] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #117] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #116] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #115] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #114] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #113] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #111] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #110] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #109] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #108] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #107] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #106] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #105] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #104] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #103] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #102] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #101] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #100] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #99] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #98] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strb w10, [sp, #97] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #95] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #94] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #93] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #92] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #91] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #90] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #89] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #88] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #87] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #86] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #85] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #84] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #83] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #82] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #81] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #79] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #78] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #77] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #76] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #75] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #74] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #73] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #72] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #71] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #70] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #69] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #68] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #67] -; VBITS_GE_2048-NEXT: strb w11, [sp, #66] -; VBITS_GE_2048-NEXT: strb w12, [sp, #65] -; VBITS_GE_2048-NEXT: strb w9, [sp, #64] -; VBITS_GE_2048-NEXT: ldr x9, [x2] -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #63] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #62] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #61] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #60] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #59] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #58] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #57] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #56] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #55] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #54] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #53] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #52] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #51] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #50] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #49] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #47] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #46] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #45] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #44] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #43] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #42] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #41] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #40] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #39] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #38] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #37] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #36] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #35] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #34] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strb w10, [sp, #33] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #31] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #30] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #29] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #28] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #27] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #26] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #25] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #24] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #23] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #22] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #21] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #20] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #19] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #18] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #17] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #16] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #15] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #14] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #13] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #12] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #11] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #10] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #9] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #8] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #7] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #6] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strb w11, [sp, #5] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strb w12, [sp, #4] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strb w10, [sp, #3] -; VBITS_GE_2048-NEXT: strb w11, [sp, #2] -; VBITS_GE_2048-NEXT: strb w12, [sp, #1] -; VBITS_GE_2048-NEXT: strb w9, [sp] -; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #255] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #254] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #253] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #252] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #251] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #250] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #249] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #248] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #247] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #246] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #245] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #244] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #243] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #242] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #241] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #239] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #238] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #237] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #236] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #235] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #234] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #233] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #232] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #231] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #230] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #229] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #228] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #227] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #226] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strb w9, [sp, #225] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #223] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #222] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #221] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #220] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #219] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #218] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #217] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #216] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #215] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #214] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #213] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #212] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #211] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #210] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #209] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #207] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #206] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #205] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #204] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #203] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #202] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #201] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #200] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #199] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #198] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #197] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #196] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #195] +; VBITS_GE_2048-NEXT: strb w10, [sp, #194] +; VBITS_GE_2048-NEXT: strb w11, [sp, #193] +; VBITS_GE_2048-NEXT: strb w8, [sp, #192] +; VBITS_GE_2048-NEXT: ldr x8, [x2, #16] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #191] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #190] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #189] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #188] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #187] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #186] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #185] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #184] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #183] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #182] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #181] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #180] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #179] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #178] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #177] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #175] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #174] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #173] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #172] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #171] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #170] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #169] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #168] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #167] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #166] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #165] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #164] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #163] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #162] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strb w9, [sp, #161] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #159] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #158] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #157] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #156] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #155] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #154] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #153] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #152] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #151] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #150] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #149] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #148] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #147] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #146] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #145] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #143] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #142] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #141] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #140] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #139] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #138] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #137] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #136] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #135] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #134] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #133] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #132] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #131] +; VBITS_GE_2048-NEXT: strb w10, [sp, #130] +; VBITS_GE_2048-NEXT: strb w11, [sp, #129] +; VBITS_GE_2048-NEXT: strb w8, [sp, #128] +; VBITS_GE_2048-NEXT: ldr x8, [x2, #8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #127] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #126] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #125] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #124] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #123] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #122] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #121] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #120] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #119] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #118] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #117] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #116] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #115] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #114] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #113] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #111] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #110] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #109] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #108] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #107] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #106] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #105] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #104] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #103] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #102] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #101] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #100] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #99] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #98] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strb w9, [sp, #97] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #95] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #94] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #93] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #92] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #91] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #90] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #89] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #88] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #87] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #86] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #85] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #84] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #83] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #82] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #81] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #79] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #78] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #77] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #76] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #75] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #74] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #73] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #72] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #71] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #70] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #69] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #68] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #67] +; VBITS_GE_2048-NEXT: strb w10, [sp, #66] +; VBITS_GE_2048-NEXT: strb w11, [sp, #65] +; VBITS_GE_2048-NEXT: strb w8, [sp, #64] +; VBITS_GE_2048-NEXT: ldr x8, [x2] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #63] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #62] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #61] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #60] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #59] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #58] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #57] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #56] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #55] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #54] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #53] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #52] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #51] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #50] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #49] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #47] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #46] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #45] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #44] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #43] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #42] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #41] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #40] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #39] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #38] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #37] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #36] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #35] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #34] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strb w9, [sp, #33] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #31] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #30] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #29] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #28] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #27] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #26] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #25] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #24] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #23] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #22] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #21] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #20] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #19] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #18] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #17] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #16] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #15] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #14] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #13] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #12] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #11] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #10] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #9] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #8] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #7] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #6] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strb w10, [sp, #5] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strb w11, [sp, #4] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strb w9, [sp, #3] +; VBITS_GE_2048-NEXT: strb w10, [sp, #2] +; VBITS_GE_2048-NEXT: strb w11, [sp, #1] +; VBITS_GE_2048-NEXT: strb w8, [sp] +; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1b { z2.b }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.b, z0.b, #0x1 @@ -1171,43 +1167,42 @@ define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, <16 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrh w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrh w8, [x2] ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: sbfx w10, w9, #15, #1 -; CHECK-NEXT: sbfx w11, w9, #14, #1 -; CHECK-NEXT: sbfx w12, w9, #13, #1 -; CHECK-NEXT: strh w10, [sp, #30] -; CHECK-NEXT: sbfx w10, w9, #12, #1 -; CHECK-NEXT: strh w11, [sp, #28] -; CHECK-NEXT: sbfx w11, w9, #11, #1 -; CHECK-NEXT: strh w12, [sp, #26] -; CHECK-NEXT: sbfx w12, w9, #10, #1 -; CHECK-NEXT: strh w10, [sp, #24] -; CHECK-NEXT: sbfx w10, w9, #9, #1 -; CHECK-NEXT: strh w11, [sp, #22] -; CHECK-NEXT: sbfx w11, w9, #8, #1 -; CHECK-NEXT: strh w12, [sp, #20] -; CHECK-NEXT: sbfx w12, w9, #7, #1 -; CHECK-NEXT: strh w10, [sp, #18] -; CHECK-NEXT: sbfx w10, w9, #6, #1 -; CHECK-NEXT: strh w11, [sp, #16] -; CHECK-NEXT: sbfx w11, w9, #5, #1 -; CHECK-NEXT: strh w12, [sp, #14] -; CHECK-NEXT: sbfx w12, w9, #4, #1 -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: sbfx w10, w9, #3, #1 -; CHECK-NEXT: strh w11, [sp, #10] -; CHECK-NEXT: sbfx w11, w9, #2, #1 -; CHECK-NEXT: strh w12, [sp, #8] -; CHECK-NEXT: sbfx w12, w9, #1, #1 -; CHECK-NEXT: sbfx w9, w9, #0, #1 -; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: strh w11, [sp, #4] -; CHECK-NEXT: strh w12, [sp, #2] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] +; CHECK-NEXT: sbfx w9, w8, #15, #1 +; CHECK-NEXT: sbfx w10, w8, #14, #1 +; CHECK-NEXT: sbfx w11, w8, #13, #1 +; CHECK-NEXT: strh w9, [sp, #30] +; CHECK-NEXT: sbfx w9, w8, #12, #1 +; CHECK-NEXT: strh w10, [sp, #28] +; CHECK-NEXT: sbfx w10, w8, #11, #1 +; CHECK-NEXT: strh w11, [sp, #26] +; CHECK-NEXT: sbfx w11, w8, #10, #1 +; CHECK-NEXT: strh w9, [sp, #24] +; CHECK-NEXT: sbfx w9, w8, #9, #1 +; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: sbfx w10, w8, #8, #1 +; CHECK-NEXT: strh w11, [sp, #20] +; CHECK-NEXT: sbfx w11, w8, #7, #1 +; CHECK-NEXT: strh w9, [sp, #18] +; CHECK-NEXT: sbfx w9, w8, #6, #1 +; CHECK-NEXT: strh w10, [sp, #16] +; CHECK-NEXT: sbfx w10, w8, #5, #1 +; CHECK-NEXT: strh w11, [sp, #14] +; CHECK-NEXT: sbfx w11, w8, #4, #1 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: sbfx w9, w8, #3, #1 +; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: sbfx w10, w8, #2, #1 +; CHECK-NEXT: strh w11, [sp, #8] +; CHECK-NEXT: sbfx w11, w8, #1, #1 +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: strh w9, [sp, #6] +; CHECK-NEXT: strh w10, [sp, #4] +; CHECK-NEXT: strh w11, [sp, #2] +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] ; CHECK-NEXT: and z0.h, z0.h, #0x1 @@ -1235,75 +1230,74 @@ define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, <32 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldr w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldr w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 ; VBITS_GE_512-NEXT: ptrue p1.h -; VBITS_GE_512-NEXT: asr w10, w9, #31 -; VBITS_GE_512-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_512-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #62] -; VBITS_GE_512-NEXT: sbfx w10, w9, #28, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #60] -; VBITS_GE_512-NEXT: sbfx w11, w9, #27, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #58] -; VBITS_GE_512-NEXT: sbfx w12, w9, #26, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #56] -; VBITS_GE_512-NEXT: sbfx w10, w9, #25, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #54] -; VBITS_GE_512-NEXT: sbfx w11, w9, #24, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #52] -; VBITS_GE_512-NEXT: sbfx w12, w9, #23, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #50] -; VBITS_GE_512-NEXT: sbfx w10, w9, #22, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #48] -; VBITS_GE_512-NEXT: sbfx w11, w9, #21, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #46] -; VBITS_GE_512-NEXT: sbfx w12, w9, #20, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #44] -; VBITS_GE_512-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #42] -; VBITS_GE_512-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #40] -; VBITS_GE_512-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #38] -; VBITS_GE_512-NEXT: sbfx w10, w9, #16, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #36] -; VBITS_GE_512-NEXT: sbfx w11, w9, #15, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #34] -; VBITS_GE_512-NEXT: sbfx w12, w9, #14, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #32] -; VBITS_GE_512-NEXT: sbfx w10, w9, #13, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #30] -; VBITS_GE_512-NEXT: sbfx w11, w9, #12, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #28] -; VBITS_GE_512-NEXT: sbfx w12, w9, #11, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #26] -; VBITS_GE_512-NEXT: sbfx w10, w9, #10, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #24] -; VBITS_GE_512-NEXT: sbfx w11, w9, #9, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #22] -; VBITS_GE_512-NEXT: sbfx w12, w9, #8, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #20] -; VBITS_GE_512-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #18] -; VBITS_GE_512-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #16] -; VBITS_GE_512-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #14] -; VBITS_GE_512-NEXT: sbfx w10, w9, #4, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #12] -; VBITS_GE_512-NEXT: sbfx w11, w9, #3, #1 -; VBITS_GE_512-NEXT: strh w12, [sp, #10] -; VBITS_GE_512-NEXT: sbfx w12, w9, #2, #1 -; VBITS_GE_512-NEXT: strh w10, [sp, #8] -; VBITS_GE_512-NEXT: sbfx w10, w9, #1, #1 -; VBITS_GE_512-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_512-NEXT: strh w11, [sp, #6] -; VBITS_GE_512-NEXT: strh w12, [sp, #4] -; VBITS_GE_512-NEXT: strh w10, [sp, #2] -; VBITS_GE_512-NEXT: strh w9, [sp] -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_512-NEXT: asr w9, w8, #31 +; VBITS_GE_512-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_512-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #62] +; VBITS_GE_512-NEXT: sbfx w9, w8, #28, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #60] +; VBITS_GE_512-NEXT: sbfx w10, w8, #27, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #58] +; VBITS_GE_512-NEXT: sbfx w11, w8, #26, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #56] +; VBITS_GE_512-NEXT: sbfx w9, w8, #25, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #54] +; VBITS_GE_512-NEXT: sbfx w10, w8, #24, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #52] +; VBITS_GE_512-NEXT: sbfx w11, w8, #23, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #50] +; VBITS_GE_512-NEXT: sbfx w9, w8, #22, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #48] +; VBITS_GE_512-NEXT: sbfx w10, w8, #21, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #46] +; VBITS_GE_512-NEXT: sbfx w11, w8, #20, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #44] +; VBITS_GE_512-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #42] +; VBITS_GE_512-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #40] +; VBITS_GE_512-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #38] +; VBITS_GE_512-NEXT: sbfx w9, w8, #16, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #36] +; VBITS_GE_512-NEXT: sbfx w10, w8, #15, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #34] +; VBITS_GE_512-NEXT: sbfx w11, w8, #14, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #32] +; VBITS_GE_512-NEXT: sbfx w9, w8, #13, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #30] +; VBITS_GE_512-NEXT: sbfx w10, w8, #12, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #28] +; VBITS_GE_512-NEXT: sbfx w11, w8, #11, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #26] +; VBITS_GE_512-NEXT: sbfx w9, w8, #10, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #24] +; VBITS_GE_512-NEXT: sbfx w10, w8, #9, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #22] +; VBITS_GE_512-NEXT: sbfx w11, w8, #8, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #20] +; VBITS_GE_512-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #18] +; VBITS_GE_512-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #16] +; VBITS_GE_512-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #14] +; VBITS_GE_512-NEXT: sbfx w9, w8, #4, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #12] +; VBITS_GE_512-NEXT: sbfx w10, w8, #3, #1 +; VBITS_GE_512-NEXT: strh w11, [sp, #10] +; VBITS_GE_512-NEXT: sbfx w11, w8, #2, #1 +; VBITS_GE_512-NEXT: strh w9, [sp, #8] +; VBITS_GE_512-NEXT: sbfx w9, w8, #1, #1 +; VBITS_GE_512-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_512-NEXT: strh w10, [sp, #6] +; VBITS_GE_512-NEXT: strh w11, [sp, #4] +; VBITS_GE_512-NEXT: strh w9, [sp, #2] +; VBITS_GE_512-NEXT: strh w8, [sp] +; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1 @@ -1331,139 +1325,138 @@ define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, <64 x i1>* %c) #0 { ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldr x9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldr x8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.h, vl64 ; VBITS_GE_1024-NEXT: ptrue p1.h -; VBITS_GE_1024-NEXT: asr x10, x9, #63 -; VBITS_GE_1024-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #126] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #124] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #122] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #120] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #118] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #116] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #114] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #112] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #110] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #108] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #106] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #104] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #102] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #100] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #98] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #96] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #94] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #92] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #90] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #88] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #86] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #84] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #82] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #80] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #78] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #76] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #74] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #72] -; VBITS_GE_1024-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #70] -; VBITS_GE_1024-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #68] -; VBITS_GE_1024-NEXT: asr w12, w9, #31 -; VBITS_GE_1024-NEXT: strh w10, [sp, #66] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #64] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #62] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #60] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #58] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #56] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #54] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #52] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #50] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #48] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #46] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #44] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #42] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #40] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #38] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #36] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #34] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #30] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #28] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #26] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #24] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #22] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #20] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #18] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #16] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #14] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #12] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: strh w11, [sp, #10] -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: strh w12, [sp, #8] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: strh w10, [sp, #6] -; VBITS_GE_1024-NEXT: strh w11, [sp, #4] -; VBITS_GE_1024-NEXT: strh w12, [sp, #2] -; VBITS_GE_1024-NEXT: strh w9, [sp] -; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_1024-NEXT: asr x9, x8, #63 +; VBITS_GE_1024-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #126] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #124] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #122] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #120] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #118] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #116] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #114] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #112] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #110] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #108] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #106] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #104] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #102] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #100] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #98] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #96] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #94] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #92] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #90] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #88] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #86] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #84] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #82] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #80] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #78] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #76] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #74] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #72] +; VBITS_GE_1024-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #70] +; VBITS_GE_1024-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #68] +; VBITS_GE_1024-NEXT: asr w11, w8, #31 +; VBITS_GE_1024-NEXT: strh w9, [sp, #66] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #64] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #62] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #60] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #58] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #56] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #54] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #52] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #50] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #48] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #46] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #44] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #42] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #40] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #38] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #36] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #34] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #30] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #28] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #26] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #24] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #22] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #20] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #18] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #16] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #14] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #12] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: strh w10, [sp, #10] +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: strh w11, [sp, #8] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: strh w9, [sp, #6] +; VBITS_GE_1024-NEXT: strh w10, [sp, #4] +; VBITS_GE_1024-NEXT: strh w11, [sp, #2] +; VBITS_GE_1024-NEXT: strh w8, [sp] +; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.h, z0.h, #0x1 @@ -1491,268 +1484,267 @@ define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, <128 x i1>* %c) #0 ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr x9, [x2, #8] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr x8, [x2, #8] ; VBITS_GE_2048-NEXT: ptrue p0.h, vl128 ; VBITS_GE_2048-NEXT: ptrue p1.h -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #254] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #252] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #250] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #248] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #246] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #244] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #242] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #238] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #236] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #234] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #232] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #230] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #228] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #226] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #222] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #220] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #218] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #216] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #214] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #212] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #210] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #206] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #204] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #202] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #200] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #198] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #196] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strh w10, [sp, #194] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #190] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #188] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #186] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #184] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #182] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #180] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #178] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #174] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #172] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #170] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #168] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #166] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #164] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #162] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #160] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #158] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #156] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #154] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #152] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #150] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #148] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #146] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #142] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #140] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #138] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #136] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #134] -; VBITS_GE_2048-NEXT: strh w11, [sp, #132] -; VBITS_GE_2048-NEXT: strh w12, [sp, #130] -; VBITS_GE_2048-NEXT: strh w9, [sp, #128] -; VBITS_GE_2048-NEXT: ldr x9, [x2] -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #126] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #60, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #124] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #59, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #122] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #58, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #120] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #57, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #118] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #56, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #116] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #55, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #114] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #54, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #53, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #110] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #52, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #108] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #106] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #104] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #102] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #48, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #100] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #47, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #98] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #46, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #45, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #94] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #44, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #92] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #43, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #90] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #42, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #88] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #41, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #86] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #40, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #84] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #82] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #78] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #36, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #76] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #35, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #74] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #34, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #72] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #33, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #70] -; VBITS_GE_2048-NEXT: sbfx x11, x9, #32, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #68] -; VBITS_GE_2048-NEXT: asr w12, w9, #31 -; VBITS_GE_2048-NEXT: strh w10, [sp, #66] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #30, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #29, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #62] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #28, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #60] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #58] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #56] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #54] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #24, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #52] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #23, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #50] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #22, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #21, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #46] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #20, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #44] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #19, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #42] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #18, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #40] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #17, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #38] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #16, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #36] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #34] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #30] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #12, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #28] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #11, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #26] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #10, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #24] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #9, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #22] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #8, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #20] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #7, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #18] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #6, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #16] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #5, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #14] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #4, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #12] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: strh w11, [sp, #10] -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: strh w12, [sp, #8] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: strh w10, [sp, #6] -; VBITS_GE_2048-NEXT: strh w11, [sp, #4] -; VBITS_GE_2048-NEXT: strh w12, [sp, #2] -; VBITS_GE_2048-NEXT: strh w9, [sp] -; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #254] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #252] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #250] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #248] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #246] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #244] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #242] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #238] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #236] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #234] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #232] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #230] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #228] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #226] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #222] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #220] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #218] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #216] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #214] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #212] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #210] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #206] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #204] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #202] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #200] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #198] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #196] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strh w9, [sp, #194] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #190] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #188] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #186] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #184] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #182] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #180] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #178] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #174] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #172] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #170] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #168] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #166] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #164] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #162] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #158] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #156] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #154] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #152] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #150] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #148] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #146] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #142] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #140] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #138] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #136] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #134] +; VBITS_GE_2048-NEXT: strh w10, [sp, #132] +; VBITS_GE_2048-NEXT: strh w11, [sp, #130] +; VBITS_GE_2048-NEXT: strh w8, [sp, #128] +; VBITS_GE_2048-NEXT: ldr x8, [x2] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #126] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #60, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #124] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #59, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #122] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #58, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #120] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #57, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #118] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #56, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #116] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #55, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #114] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #54, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #53, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #110] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #52, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #108] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #106] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #104] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #102] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #48, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #100] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #47, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #98] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #46, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #45, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #94] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #44, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #92] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #43, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #90] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #42, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #88] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #41, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #86] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #40, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #84] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #82] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #78] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #36, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #76] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #35, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #74] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #34, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #72] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #33, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #70] +; VBITS_GE_2048-NEXT: sbfx x10, x8, #32, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #68] +; VBITS_GE_2048-NEXT: asr w11, w8, #31 +; VBITS_GE_2048-NEXT: strh w9, [sp, #66] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #30, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #29, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #62] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #28, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #60] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #58] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #56] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #54] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #24, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #52] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #23, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #50] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #22, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #21, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #46] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #20, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #44] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #19, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #42] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #18, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #40] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #17, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #38] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #16, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #36] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #34] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #30] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #12, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #28] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #11, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #26] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #10, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #24] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #9, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #22] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #8, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #20] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #7, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #18] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #6, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #16] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #5, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #14] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #4, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #12] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: strh w10, [sp, #10] +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: strh w11, [sp, #8] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: strh w9, [sp, #6] +; VBITS_GE_2048-NEXT: strh w10, [sp, #4] +; VBITS_GE_2048-NEXT: strh w11, [sp, #2] +; VBITS_GE_2048-NEXT: strh w8, [sp] +; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1h { z2.h }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.h, z0.h, #0x1 @@ -1805,23 +1797,22 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrb w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrb w8, [x2] ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: sbfx w10, w9, #7, #1 -; CHECK-NEXT: sbfx w11, w9, #6, #1 -; CHECK-NEXT: sbfx w12, w9, #5, #1 -; CHECK-NEXT: sbfx w13, w9, #4, #1 -; CHECK-NEXT: stp w11, w10, [sp, #24] -; CHECK-NEXT: sbfx w10, w9, #3, #1 -; CHECK-NEXT: sbfx w11, w9, #2, #1 -; CHECK-NEXT: stp w13, w12, [sp, #16] -; CHECK-NEXT: sbfx w12, w9, #1, #1 -; CHECK-NEXT: sbfx w9, w9, #0, #1 -; CHECK-NEXT: stp w11, w10, [sp, #8] -; CHECK-NEXT: stp w9, w12, [sp] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: sbfx w9, w8, #7, #1 +; CHECK-NEXT: sbfx w10, w8, #6, #1 +; CHECK-NEXT: sbfx w11, w8, #5, #1 +; CHECK-NEXT: sbfx w12, w8, #4, #1 +; CHECK-NEXT: stp w10, w9, [sp, #24] +; CHECK-NEXT: sbfx w9, w8, #3, #1 +; CHECK-NEXT: sbfx w10, w8, #2, #1 +; CHECK-NEXT: stp w12, w11, [sp, #16] +; CHECK-NEXT: sbfx w11, w8, #1, #1 +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: stp w10, w9, [sp, #8] +; CHECK-NEXT: stp w8, w11, [sp] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] ; CHECK-NEXT: and z0.s, z0.s, #0x1 @@ -1849,35 +1840,34 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, <16 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldrh w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldrh w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: ptrue p1.s -; VBITS_GE_512-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_512-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_512-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_512-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_512-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_512-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_512-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_512-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_512-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_512-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_512-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_512-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_512-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_512-NEXT: stp w9, w12, [sp] -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_512-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_512-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_512-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_512-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_512-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_512-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_512-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_512-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_512-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_512-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_512-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_512-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_512-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_512-NEXT: stp w8, w11, [sp] +; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.s, z0.s, #0x1 @@ -1905,59 +1895,58 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, <32 x i1>* %c) #0 { ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldr w9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldr w8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 ; VBITS_GE_1024-NEXT: ptrue p1.s -; VBITS_GE_1024-NEXT: asr w10, w9, #31 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_1024-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #28, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #120] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #112] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #24, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #104] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #23, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #22, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #96] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #21, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #20, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #88] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #80] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #16, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #72] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #64] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_1024-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_1024-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_1024-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_1024-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_1024-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_1024-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_1024-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_1024-NEXT: stp w9, w12, [sp] -; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_1024-NEXT: asr w9, w8, #31 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_1024-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #28, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #120] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #112] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #24, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #104] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #23, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #22, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #96] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #21, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #20, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #88] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #80] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #16, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #72] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #64] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_1024-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_1024-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_1024-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_1024-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_1024-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_1024-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_1024-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_1024-NEXT: stp w8, w11, [sp] +; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.s, z0.s, #0x1 @@ -1985,107 +1974,106 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, <64 x i1>* %c) #0 { ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr x9, [x2] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr x8, [x2] ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64 ; VBITS_GE_2048-NEXT: ptrue p1.s -; VBITS_GE_2048-NEXT: asr x10, x9, #63 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #62, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x9, #61, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #60, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #248] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #59, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #58, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #57, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #56, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #232] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #55, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #54, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #53, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #52, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #216] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #51, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #50, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #49, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #48, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #200] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #47, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #46, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #45, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #44, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #184] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #43, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #42, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #41, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #40, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #168] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #39, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #38, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #160] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #37, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #36, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #152] -; VBITS_GE_2048-NEXT: sbfx x10, x9, #35, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x9, #34, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx x12, x9, #33, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x9, #32, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #136] -; VBITS_GE_2048-NEXT: asr w10, w9, #31 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #30, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #128] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #29, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #28, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #120] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #27, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #26, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #25, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #24, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #104] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #23, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #22, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #21, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #20, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #88] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #19, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #18, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #17, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #16, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #72] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #15, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #14, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #13, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #12, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #56] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #11, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #10, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #9, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #8, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #40] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #7, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #6, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #5, #1 -; VBITS_GE_2048-NEXT: sbfx w13, w9, #4, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #24] -; VBITS_GE_2048-NEXT: sbfx w10, w9, #3, #1 -; VBITS_GE_2048-NEXT: sbfx w11, w9, #2, #1 -; VBITS_GE_2048-NEXT: stp w13, w12, [sp, #16] -; VBITS_GE_2048-NEXT: sbfx w12, w9, #1, #1 -; VBITS_GE_2048-NEXT: sbfx w9, w9, #0, #1 -; VBITS_GE_2048-NEXT: stp w11, w10, [sp, #8] -; VBITS_GE_2048-NEXT: stp w9, w12, [sp] -; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x8] +; VBITS_GE_2048-NEXT: asr x9, x8, #63 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #62, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x8, #61, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #60, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #248] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #59, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #58, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #57, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #56, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #232] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #55, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #54, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #53, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #52, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #216] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #51, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #50, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #49, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #48, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #200] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #47, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #46, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #45, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #44, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #184] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #43, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #42, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #41, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #40, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #168] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #39, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #38, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #37, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #36, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #152] +; VBITS_GE_2048-NEXT: sbfx x9, x8, #35, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x8, #34, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx x11, x8, #33, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x8, #32, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #136] +; VBITS_GE_2048-NEXT: asr w9, w8, #31 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #30, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #128] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #29, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #28, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #120] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #27, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #26, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #25, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #24, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #104] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #23, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #22, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #21, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #20, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #88] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #19, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #18, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #17, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #16, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #72] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #15, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #14, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #13, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #12, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #56] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #11, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #10, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #9, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #8, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #40] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #7, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #6, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #5, #1 +; VBITS_GE_2048-NEXT: sbfx w12, w8, #4, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #24] +; VBITS_GE_2048-NEXT: sbfx w9, w8, #3, #1 +; VBITS_GE_2048-NEXT: sbfx w10, w8, #2, #1 +; VBITS_GE_2048-NEXT: stp w12, w11, [sp, #16] +; VBITS_GE_2048-NEXT: sbfx w11, w8, #1, #1 +; VBITS_GE_2048-NEXT: sbfx w8, w8, #0, #1 +; VBITS_GE_2048-NEXT: stp w10, w9, [sp, #8] +; VBITS_GE_2048-NEXT: stp w8, w11, [sp] +; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1w { z2.s }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.s, z0.s, #0x1 @@ -2139,20 +2127,19 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, <4 x i1>* %c) #0 { ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrb w9, [x2] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ldrb w8, [x2] ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: lsr w10, w9, #3 -; CHECK-NEXT: lsr w11, w9, #2 -; CHECK-NEXT: sbfx x12, x9, #0, #1 -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: sbfx x10, x10, #0, #1 -; CHECK-NEXT: sbfx x11, x11, #0, #1 +; CHECK-NEXT: lsr w9, w8, #3 +; CHECK-NEXT: lsr w10, w8, #2 +; CHECK-NEXT: sbfx x11, x8, #0, #1 +; CHECK-NEXT: lsr w8, w8, #1 ; CHECK-NEXT: sbfx x9, x9, #0, #1 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: stp x12, x9, [sp] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] +; CHECK-NEXT: sbfx x10, x10, #0, #1 +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: stp x10, x9, [sp, #16] +; CHECK-NEXT: stp x11, x8, [sp] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] ; CHECK-NEXT: and z0.d, z0.d, #0x1 @@ -2180,30 +2167,29 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, <8 x i1>* %c) #0 { ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 -; VBITS_GE_512-NEXT: ldrb w9, [x2] -; VBITS_GE_512-NEXT: mov x8, sp +; VBITS_GE_512-NEXT: ldrb w8, [x2] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d -; VBITS_GE_512-NEXT: lsr w10, w9, #7 -; VBITS_GE_512-NEXT: lsr w11, w9, #6 -; VBITS_GE_512-NEXT: lsr w12, w9, #5 -; VBITS_GE_512-NEXT: lsr w13, w9, #4 +; VBITS_GE_512-NEXT: lsr w9, w8, #7 +; VBITS_GE_512-NEXT: lsr w10, w8, #6 +; VBITS_GE_512-NEXT: lsr w11, w8, #5 +; VBITS_GE_512-NEXT: lsr w12, w8, #4 +; VBITS_GE_512-NEXT: sbfx x9, x9, #0, #1 ; VBITS_GE_512-NEXT: sbfx x10, x10, #0, #1 ; VBITS_GE_512-NEXT: sbfx x11, x11, #0, #1 ; VBITS_GE_512-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_512-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_512-NEXT: lsr w14, w9, #3 -; VBITS_GE_512-NEXT: stp x11, x10, [sp, #48] -; VBITS_GE_512-NEXT: lsr w10, w9, #2 -; VBITS_GE_512-NEXT: stp x13, x12, [sp, #32] -; VBITS_GE_512-NEXT: sbfx x12, x9, #0, #1 -; VBITS_GE_512-NEXT: lsr w9, w9, #1 -; VBITS_GE_512-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_512-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_512-NEXT: lsr w13, w8, #3 +; VBITS_GE_512-NEXT: stp x10, x9, [sp, #48] +; VBITS_GE_512-NEXT: lsr w9, w8, #2 +; VBITS_GE_512-NEXT: stp x12, x11, [sp, #32] +; VBITS_GE_512-NEXT: sbfx x11, x8, #0, #1 +; VBITS_GE_512-NEXT: lsr w8, w8, #1 +; VBITS_GE_512-NEXT: sbfx x10, x13, #0, #1 ; VBITS_GE_512-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_512-NEXT: stp x10, x11, [sp, #16] -; VBITS_GE_512-NEXT: stp x12, x9, [sp] -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_512-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_512-NEXT: stp x9, x10, [sp, #16] +; VBITS_GE_512-NEXT: stp x11, x8, [sp] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: and z0.d, z0.d, #0x1 @@ -2231,50 +2217,49 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, <16 x i1>* %c) #0 { ; VBITS_GE_1024-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_1024-NEXT: .cfi_offset w30, -8 ; VBITS_GE_1024-NEXT: .cfi_offset w29, -16 -; VBITS_GE_1024-NEXT: ldrh w9, [x2] -; VBITS_GE_1024-NEXT: mov x8, sp +; VBITS_GE_1024-NEXT: ldrh w8, [x2] ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d -; VBITS_GE_1024-NEXT: lsr w10, w9, #15 -; VBITS_GE_1024-NEXT: lsr w11, w9, #14 -; VBITS_GE_1024-NEXT: lsr w12, w9, #13 -; VBITS_GE_1024-NEXT: lsr w13, w9, #12 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: lsr w14, w9, #11 -; VBITS_GE_1024-NEXT: lsr w15, w9, #10 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #112] -; VBITS_GE_1024-NEXT: lsr w10, w9, #9 -; VBITS_GE_1024-NEXT: stp x13, x12, [sp, #96] -; VBITS_GE_1024-NEXT: lsr w13, w9, #8 -; VBITS_GE_1024-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x15, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: lsr w14, w9, #3 -; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #80] -; VBITS_GE_1024-NEXT: lsr w11, w9, #6 -; VBITS_GE_1024-NEXT: stp x13, x10, [sp, #64] -; VBITS_GE_1024-NEXT: lsr w10, w9, #7 -; VBITS_GE_1024-NEXT: lsr w12, w9, #5 -; VBITS_GE_1024-NEXT: lsr w13, w9, #4 -; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #48] -; VBITS_GE_1024-NEXT: lsr w11, w9, #2 -; VBITS_GE_1024-NEXT: stp x13, x12, [sp, #32] -; VBITS_GE_1024-NEXT: sbfx x12, x9, #0, #1 -; VBITS_GE_1024-NEXT: lsr w9, w9, #1 -; VBITS_GE_1024-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: lsr w9, w8, #15 +; VBITS_GE_1024-NEXT: lsr w10, w8, #14 +; VBITS_GE_1024-NEXT: lsr w11, w8, #13 +; VBITS_GE_1024-NEXT: lsr w12, w8, #12 ; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #16] -; VBITS_GE_1024-NEXT: stp x12, x9, [sp] -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: lsr w13, w8, #11 +; VBITS_GE_1024-NEXT: lsr w14, w8, #10 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #112] +; VBITS_GE_1024-NEXT: lsr w9, w8, #9 +; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #96] +; VBITS_GE_1024-NEXT: lsr w12, w8, #8 +; VBITS_GE_1024-NEXT: sbfx x10, x13, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x14, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: lsr w13, w8, #3 +; VBITS_GE_1024-NEXT: stp x11, x10, [sp, #80] +; VBITS_GE_1024-NEXT: lsr w10, w8, #6 +; VBITS_GE_1024-NEXT: stp x12, x9, [sp, #64] +; VBITS_GE_1024-NEXT: lsr w9, w8, #7 +; VBITS_GE_1024-NEXT: lsr w11, w8, #5 +; VBITS_GE_1024-NEXT: lsr w12, w8, #4 +; VBITS_GE_1024-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #48] +; VBITS_GE_1024-NEXT: lsr w10, w8, #2 +; VBITS_GE_1024-NEXT: stp x12, x11, [sp, #32] +; VBITS_GE_1024-NEXT: sbfx x11, x8, #0, #1 +; VBITS_GE_1024-NEXT: lsr w8, w8, #1 +; VBITS_GE_1024-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_1024-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_1024-NEXT: stp x10, x9, [sp, #16] +; VBITS_GE_1024-NEXT: stp x11, x8, [sp] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_1024-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_1024-NEXT: and z0.d, z0.d, #0x1 @@ -2302,121 +2287,120 @@ define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, <32 x i1>* %c) #0 { ; VBITS_GE_2048-NEXT: .cfi_def_cfa w29, 16 ; VBITS_GE_2048-NEXT: .cfi_offset w30, -8 ; VBITS_GE_2048-NEXT: .cfi_offset w29, -16 -; VBITS_GE_2048-NEXT: ldr w9, [x2] -; VBITS_GE_2048-NEXT: mov x8, sp +; VBITS_GE_2048-NEXT: ldr w8, [x2] ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d -; VBITS_GE_2048-NEXT: ubfx x10, x9, #31, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #30, #2 +; VBITS_GE_2048-NEXT: ubfx x9, x8, #31, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #30, #2 +; VBITS_GE_2048-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #29, #3 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #28, #4 +; VBITS_GE_2048-NEXT: sbfx x9, x9, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #27, #5 +; VBITS_GE_2048-NEXT: ubfx x14, x8, #26, #6 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #240] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x12, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #25, #7 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #23, #9 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x11, x9, [sp, #224] +; VBITS_GE_2048-NEXT: sbfx x9, x14, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #24, #8 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #208] +; VBITS_GE_2048-NEXT: sbfx x9, x10, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #22, #10 +; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #21, #11 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x11, x9, [sp, #192] +; VBITS_GE_2048-NEXT: sbfx x9, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #20, #12 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #19, #13 ; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #29, #3 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #28, #4 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #176] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 ; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #18, #14 ; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 ; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #17, #15 ; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #27, #5 -; VBITS_GE_2048-NEXT: ubfx x15, x9, #26, #6 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: // kill: def $w15 killed $w15 killed $x15 def $x15 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #240] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x12, x13, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x13, x14, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #25, #7 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #23, #9 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x12, x10, [sp, #224] -; VBITS_GE_2048-NEXT: sbfx x10, x15, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #24, #8 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #160] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #16, #16 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #15, #17 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #208] -; VBITS_GE_2048-NEXT: sbfx x10, x11, #0, #1 +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #144] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #14, #18 ; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #22, #10 -; VBITS_GE_2048-NEXT: sbfx x13, x14, #0, #1 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #21, #11 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x12, x10, [sp, #192] -; VBITS_GE_2048-NEXT: sbfx x10, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #20, #12 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #19, #13 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #13, #19 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #128] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #12, #20 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #11, #21 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 ; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #176] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #112] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #10, #22 ; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #18, #14 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #9, #23 ; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #17, #15 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #160] +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #96] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #8, #24 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #7, #25 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #80] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #6, #26 +; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #5, #27 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #64] +; VBITS_GE_2048-NEXT: sbfx x9, x11, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x10, x8, #4, #28 +; VBITS_GE_2048-NEXT: ubfx x11, x8, #3, #29 +; VBITS_GE_2048-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; VBITS_GE_2048-NEXT: stp x9, x12, [sp, #48] +; VBITS_GE_2048-NEXT: sbfx x9, x13, #0, #1 +; VBITS_GE_2048-NEXT: sbfx x10, x10, #0, #1 +; VBITS_GE_2048-NEXT: ubfx x12, x8, #2, #30 +; VBITS_GE_2048-NEXT: ubfx x13, x8, #1, #31 +; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; VBITS_GE_2048-NEXT: sbfx x8, x8, #0, #1 +; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; VBITS_GE_2048-NEXT: stp x10, x9, [sp, #32] +; VBITS_GE_2048-NEXT: sbfx x9, x12, #0, #1 ; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #16, #16 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #15, #17 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #144] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #14, #18 -; VBITS_GE_2048-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #13, #19 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #128] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #12, #20 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #11, #21 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #112] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #10, #22 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #9, #23 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #96] -; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #8, #24 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #7, #25 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #80] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #6, #26 -; VBITS_GE_2048-NEXT: sbfx x13, x13, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #5, #27 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #64] -; VBITS_GE_2048-NEXT: sbfx x10, x12, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x11, x9, #4, #28 -; VBITS_GE_2048-NEXT: ubfx x12, x9, #3, #29 -; VBITS_GE_2048-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 -; VBITS_GE_2048-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 -; VBITS_GE_2048-NEXT: stp x10, x13, [sp, #48] -; VBITS_GE_2048-NEXT: sbfx x10, x14, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x11, #0, #1 -; VBITS_GE_2048-NEXT: ubfx x13, x9, #2, #30 -; VBITS_GE_2048-NEXT: ubfx x14, x9, #1, #31 -; VBITS_GE_2048-NEXT: sbfx x12, x12, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 -; VBITS_GE_2048-NEXT: sbfx x9, x9, #0, #1 -; VBITS_GE_2048-NEXT: // kill: def $w14 killed $w14 killed $x14 def $x14 -; VBITS_GE_2048-NEXT: stp x11, x10, [sp, #32] -; VBITS_GE_2048-NEXT: sbfx x10, x13, #0, #1 -; VBITS_GE_2048-NEXT: sbfx x11, x14, #0, #1 -; VBITS_GE_2048-NEXT: stp x10, x12, [sp, #16] -; VBITS_GE_2048-NEXT: stp x9, x11, [sp] -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x8] +; VBITS_GE_2048-NEXT: stp x9, x11, [sp, #16] +; VBITS_GE_2048-NEXT: stp x8, x10, [sp] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [sp] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_2048-NEXT: and z0.d, z0.d, #0x1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll index 55c45eebb039..7643da08ff71 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -939,7 +939,6 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 { ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: mov z2.d, z1.d[1] @@ -947,7 +946,7 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 { ; CHECK-NEXT: mov z1.d, z0.d[3] ; CHECK-NEXT: mov z0.d, z0.d[2] ; CHECK-NEXT: stp d0, d1, [sp] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index 95ecf2582f76..9116a45224e1 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -506,18 +506,18 @@ define @test_predicate_insert_32xi1( %val, ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl] -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1b { z0.b }, p1, [sp] -; CHECK-NEXT: strb w0, [x10, x8] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK-NEXT: ld1b { z1.b }, p1/z, [x10, #1, mul vl] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: and z0.b, z0.b, #0x1 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 1122fc2c85a8..68e34dcd2940 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -213,19 +213,18 @@ define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, * %psv, * %ou ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: str q0, [sp, #16] -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp] -; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x1] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x1, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -319,11 +317,10 @@ define @insert_nxv8f16_nxv2f16( %vec, @insert_nxv6i32_nxv2i32( %sv0, , align 8 + %object = alloca , align 8 + ; Reads from %object at offset 63 * readsize + ret void + } + define void @testcase_negative_offset() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset 63 * readsize + ret void + } + + define void @testcase_positive_offset_out_of_range() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset 64 * readsize + ret void + } + define void @testcase_negative_offset_out_of_range() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset -1 * readsize + ret void + } +... +--- +name: testcase_positive_offset +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, 7 :: (load (s64) from %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_negative_offset +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_negative_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, -8 :: (load (s64) from %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_positive_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2 + ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2 + ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2 + ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4 + ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_negative_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_negative_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2 + ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2 + ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2 + ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4 + ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... diff --git a/llvm/test/CodeGen/AArch64/sve-ldstnt1.mir b/llvm/test/CodeGen/AArch64/sve-ldstnt1.mir new file mode 100644 index 000000000000..a5701a670928 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ldstnt1.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that prologepilog works for each of the LDNT1/STNT1 instructions for stack-based objects. +# +--- | + define void @testcase_positive_offset() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset 7 * readsize + ret void + } + define void @testcase_negative_offset() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset -8 * readsize + ret void + } + + define void @testcase_positive_offset_out_of_range() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset 8 * readsize + ret void + } + define void @testcase_negative_offset_out_of_range() { + %dummy = alloca , align 8 + %object = alloca , align 8 + ; Reads from %object at offset -9 * readsize + ret void + } +... +--- +name: testcase_positive_offset +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8) + ; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, 7 :: (load (s64) from %ir.object) + ; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s8) into %ir.object, align 8) + ; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s16) into %ir.object, align 8) + ; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s32) into %ir.object, align 8) + ; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s64) into %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8) + renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8) + STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 1 into %ir.object, align 8) + STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 2 into %ir.object, align 8) + STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 4 into %ir.object, align 8) + STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 8 into %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_negative_offset +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_negative_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, -8 :: (load (s32) from %ir.object) + ; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, -8 :: (load (s64) from %ir.object) + ; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s8) into %ir.object, align 8) + ; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s16) into %ir.object, align 8) + ; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s32) into %ir.object, align 8) + ; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s64) into %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 4) + renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8) + STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 1 into %ir.object, align 8) + STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 2 into %ir.object, align 8) + STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 4 into %ir.object, align 8) + STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 8 into %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_positive_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, 7 :: (load (s64) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s8) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s16) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s32) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1 + ; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s64) into %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 4) + renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 8 :: (load 8 from %ir.object, align 8) + STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 1 into %ir.object, align 8) + STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 2 into %ir.object, align 8) + STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 4 into %ir.object, align 8) + STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 8 into %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +--- +name: testcase_negative_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } + - { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_negative_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, -8 :: (load (s64) from %ir.object) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s8) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s16) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s32) into %ir.object, align 8) + ; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1 + ; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s64) into %ir.object) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $z0 + renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2) + renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2) + renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 4) + renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -9 :: (load 8 from %ir.object, align 8) + STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 1 into %ir.object, align 8) + STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 2 into %ir.object, align 8) + STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 4 into %ir.object, align 8) + STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 8 into %ir.object, align 8) + RET_ReallyLR implicit $z0 +... diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll index 56d2ff25cb15..accbb533bd8f 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -26,14 +26,14 @@ define i8 @split_extract_32i8_idx( %a, i32 %idx) { ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: st1b { z1.b }, p0, [x10, #1, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: ldrb w0, [x10, x8] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldrb w0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -51,14 +51,14 @@ define i16 @split_extract_16i16_idx( %a, i32 %idx) { ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -77,13 +77,13 @@ define i32 @split_extract_8i32_idx( %a, i32 %idx) { ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x9, w0 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldr w0, [x10, x8, lsl #2] +; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -98,19 +98,19 @@ define i64 @split_extract_8i64_idx( %a, i32 %idx) { ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cnth x9 +; CHECK-NEXT: cnth x8 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x10, w0 -; CHECK-NEXT: sub x9, x9, #1 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x10, x9 +; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] +; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -146,14 +146,14 @@ define i16 @split_extract_16i16( %a) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mov w10, #128 +; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] -; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -170,17 +170,17 @@ define i32 @split_extract_16i32( %a) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w10, #34464 -; CHECK-NEXT: movk w10, #1, lsl #16 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #34464 +; CHECK-NEXT: movk w9, #1, lsl #16 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl] -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -197,13 +197,13 @@ define i64 @split_extract_4i64( %a) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #10 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: mov w10, #10 -; CHECK-NEXT: cmp x8, #10 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] +; CHECK-NEXT: cmp x8, #10 +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll index 0b9baa23a11f..0465da7e7093 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -26,14 +26,14 @@ define @split_insert_32i8_idx( %a, i8 %elt, ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl] -; CHECK-NEXT: cmp x1, x8 +; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: cmp x1, x8 ; CHECK-NEXT: csel x8, x1, x8, lo ; CHECK-NEXT: strb w0, [x9, x8] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] +; CHECK-NEXT: ld1b { z1.b }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -53,12 +53,12 @@ define @split_insert_8f32_idx( %a, floa ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x0, x8 +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x0, x8, lo -; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str s2, [x9, x8, lsl #2] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -78,16 +78,16 @@ define @split_insert_8i64_idx( %a, i64 %elt ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x1, x8 +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: csel x8, x1, x8, lo -; CHECK-NEXT: st1d { z3.d }, p0, [x9, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x9, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str x0, [x9, x8, lsl #3] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, #1, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9, #2, mul vl] -; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -136,21 +136,21 @@ define @split_insert_32i16( %a, i16 %elt) ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w10, #128 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl] ; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: st1h { z3.h }, p0, [x9, #3, mul vl] -; CHECK-NEXT: csel x8, x8, x10, lo -; CHECK-NEXT: st1h { z2.h }, p0, [x9, #2, mul vl] -; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: strh w0, [x9, x8, lsl #1] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x9, #1, mul vl] -; CHECK-NEXT: ld1h { z2.h }, p0/z, [x9, #2, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -170,14 +170,14 @@ define @split_insert_8i32( %a, i32 %elt) { ; CHECK-NEXT: movk w9, #15, lsl #16 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str w0, [x10, x8, lsl #2] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] +; CHECK-NEXT: str w0, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret