[SVE] Use reg+reg addressing mode for immediate offsets.

For reg+imm SVE addressing mode imm is implictly scaled by VL, making them impractical for truely immediate offsets. However, if the offset can be unscaled based on the storage element type we can use the reg+reg SVE addressing mode and thus either reduce the number of generate add instructions or replace them with a mov instruction that can be hoisted from the hot code path. Differential Revision: https://reviews.llvm.org/D106744
2021-07-22 17:16:20 +01:00 · 2021-07-22 17:16:20 +01:00 · 3b77e2737c
parent 3ca6dea05d
commit 3b77e2737c
27 changed files with 712 additions and 772 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@ -5034,6 +5034,24 @@ bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
    return true;
  }

+  if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
+    int64_t ImmOff = C->getSExtValue();
+    unsigned Size = 1 << Scale;
+
+    // To use the reg+reg addressing mode, the immediate must be a multiple of
+    // the vector element's byte size.
+    if (ImmOff % Size)
+      return false;
+
+    SDLoc DL(N);
+    Base = LHS;
+    Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
+    SDValue Ops[] = {Offset};
+    SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
+    Offset = SDValue(MI, 0);
+    return true;
+  }
+
  // Check if the RHS is a shift node with a constant.
  if (RHS.getOpcode() != ISD::SHL)
    return false;
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@ -190,12 +190,11 @@ define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vsc
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-8
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -265,12 +264,11 @@ define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vsc
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-12
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-6
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -382,12 +380,11 @@ define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <v
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-4
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -711,12 +708,11 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-8
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -767,12 +763,11 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-4
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -823,12 +818,11 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
 ; CHECK-NEXT:    st1d { z1.d }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-2
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -879,12 +873,11 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-8
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -935,12 +928,11 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-4
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -991,12 +983,11 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
 ; CHECK-NEXT:    st1d { z1.d }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-2
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -1112,12 +1103,11 @@ define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8>
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
 ; CHECK-NEXT:    st1d { z1.d }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    mov x9, #-16
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    mov x9, #-2
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@ -1133,14 +1123,13 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    mov x9, #-32
+; CHECK-NEXT:    mov x9, #-8
 ; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    st1w { z3.s }, p0, [x8, #3, mul vl]
 ; CHECK-NEXT:    st1w { z2.s }, p0, [x8, #2, mul vl]
 ; CHECK-NEXT:    addvl x8, x8, #2
-; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x8, x9]
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
 ; CHECK-NEXT:    sub x8, x8, #32 // =32
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x8, #1, mul vl]
 ; CHECK-NEXT:    addvl sp, sp, #4
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
@ -65,13 +65,13 @@ define void @ctlz_v64i8(<64 x i8>* %a) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <64 x i8>, <64 x i8>* %a
  %res = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %op)
@ -146,13 +146,13 @@ define void @ctlz_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x i16>, <32 x i16>* %a
  %res = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %op)
@ -227,13 +227,13 @@ define void @ctlz_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x i32>, <16 x i32>* %a
  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %op)
@ -308,13 +308,13 @@ define void @ctlz_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x i64>, <8 x i64>* %a
  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %op)
@ -393,13 +393,13 @@ define void @ctpop_v64i8(<64 x i8>* %a) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <64 x i8>, <64 x i8>* %a
  %res = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %op)
@ -476,13 +476,13 @@ define void @ctpop_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x i16>, <32 x i16>* %a
  %res = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %op)
@ -561,13 +561,13 @@ define void @ctpop_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x i32>, <16 x i32>* %a
  %res = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %op)
@ -648,13 +648,13 @@ define void @ctpop_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x i64>, <8 x i64>* %a
  %res = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %op)
@ -737,15 +737,15 @@ define void @cttz_v64i8(<64 x i8>* %a) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[RBIT_LO]].b
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[RBIT_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <64 x i8>, <64 x i8>* %a
  %res = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %op)
@ -826,15 +826,15 @@ define void @cttz_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[RBIT_LO]].h
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[RBIT_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x i16>, <32 x i16>* %a
  %res = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %op)
@ -917,15 +917,15 @@ define void @cttz_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[RBIT_LO]].s
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[RBIT_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x i32>, <16 x i32>* %a
  %res = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %op)
@ -1006,15 +1006,15 @@ define void @cttz_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[RBIT_LO]].d
 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[RBIT_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x i64>, <8 x i64>* %a
  %res = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %op)
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
@ -73,19 +73,17 @@ define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
-; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2]
-; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x2, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -178,19 +176,17 @@ define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c)

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
-; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2]
-; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x2, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -283,19 +279,17 @@ define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
-; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2]
-; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x2, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll
@ -69,12 +69,11 @@ define void @fadd_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
 ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
 ; CHECK-DAG: fadd [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
 ; CHECK-DAG: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_256-DAG: add x[[B1:[0-9]+]], x1, #[[#VBYTES]]
-; VBITS_LE_256-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x[[A1]]]
-; VBITS_LE_256-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x[[B1]]]
+; VBITS_LE_256-DAG: mov x[[IDX_1:[0-9]+]], #[[#div(VBYTES,2)]]
+; VBITS_LE_256-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_1]], lsl #1]
+; VBITS_LE_256-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_1]], lsl #1]
 ; VBITS_LE_256-DAG: fadd [[RES_1:z[0-9]+]].h, [[PG]]/m, [[OP1_1]].h, [[OP2_1]].h
-; VBITS_LE_256-DAG: st1h { [[RES_1]].h }, [[PG]], [x[[A1]]]
+; VBITS_LE_256-DAG: st1h { [[RES_1]].h }, [[PG]], [x0, x[[IDX_1]], lsl #1]
 ; CHECK: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -90,24 +89,21 @@ define void @fadd_v64f16(<64 x half>* %a, <64 x half>* %b) #0 {
 ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
 ; CHECK-DAG: fadd [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h
 ; CHECK-DAG: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_512-DAG: add x[[B1:[0-9]+]], x1, #[[#VBYTES]]
-; VBITS_LE_512-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x[[A1]]]
-; VBITS_LE_512-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x[[B1]]]
+; VBITS_LE_512-DAG: mov x[[IDX_1:[0-9]+]], #[[#div(VBYTES,2)]]
+; VBITS_LE_512-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_1]], lsl #1]
+; VBITS_LE_512-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_1]], lsl #1]
 ; VBITS_LE_512-DAG: fadd [[RES_1:z[0-9]+]].h, [[PG]]/m, [[OP1_1]].h, [[OP2_1]].h
-; VBITS_LE_512-DAG: st1h { [[RES_1]].h }, [[PG]], [x[[A1]]]
-; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
-; VBITS_LE_256-DAG: add x[[B2:[0-9]+]], x1, #[[#mul(VBYTES,2)]]
-; VBITS_LE_256-DAG: ld1h { [[OP1_2:z[0-9]+]].h }, [[PG]]/z, [x[[A2]]]
-; VBITS_LE_256-DAG: ld1h { [[OP2_2:z[0-9]+]].h }, [[PG]]/z, [x[[B2]]]
+; VBITS_LE_512-DAG: st1h { [[RES_1]].h }, [[PG]], [x0, x[[IDX_1]], lsl #1]
+; VBITS_LE_256-DAG: mov x[[IDX_2:[0-9]+]], #[[#mul(div(VBYTES,2),2)]]
+; VBITS_LE_256-DAG: ld1h { [[OP1_2:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_2]], lsl #1]
+; VBITS_LE_256-DAG: ld1h { [[OP2_2:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_2]], lsl #1]
 ; VBITS_LE_256-DAG: fadd [[RES_2:z[0-9]+]].h, [[PG]]/m, [[OP1_2]].h, [[OP2_2]].h
-; VBITS_LE_256-DAG: st1h { [[RES_2]].h }, [[PG]], [x[[A2]]]
-; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
-; VBITS_LE_256-DAG: add x[[B3:[0-9]+]], x1, #[[#mul(VBYTES,3)]]
-; VBITS_LE_256-DAG: ld1h { [[OP1_3:z[0-9]+]].h }, [[PG]]/z, [x[[A3]]]
-; VBITS_LE_256-DAG: ld1h { [[OP2_3:z[0-9]+]].h }, [[PG]]/z, [x[[B3]]]
+; VBITS_LE_256-DAG: st1h { [[RES_2]].h }, [[PG]], [x0, x[[IDX_2]], lsl #1]
+; VBITS_LE_256-DAG: mov x[[IDX_3:[0-9]+]], #[[#mul(div(VBYTES,2),3)]]
+; VBITS_LE_256-DAG: ld1h { [[OP1_3:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_3]], lsl #1]
+; VBITS_LE_256-DAG: ld1h { [[OP2_3:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_3]], lsl #1]
 ; VBITS_LE_256-DAG: fadd [[RES_3:z[0-9]+]].h, [[PG]]/m, [[OP1_3]].h, [[OP2_3]].h
-; VBITS_LE_256-DAG: st1h { [[RES_3]].h }, [[PG]], [x[[A3]]]
+; VBITS_LE_256-DAG: st1h { [[RES_3]].h }, [[PG]], [x0, x[[IDX_3]], lsl #1]
 ; CHECK: ret
  %op1 = load <64 x half>, <64 x half>* %a
  %op2 = load <64 x half>, <64 x half>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
@ -72,13 +72,13 @@ define void @fcvt_v16f16_v16f32(<16 x half>* %a, <16 x float>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h
 ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x[[B_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
  %op1 = load <16 x half>, <16 x half>* %a
  %res = fpext <16 x half> %op1 to <16 x float>
  store <16 x float> %res, <16 x float>* %b
@ -168,7 +168,7 @@ define void @fcvt_v8f16_v8f64(<8 x half>* %a, <8 x double>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[OP]].16b, v[[OP]].16b, #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[OP]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
@ -177,7 +177,7 @@ define void @fcvt_v8f16_v8f64(<8 x half>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[UPK2_LO]].h
 ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[UPK2_HI]].h
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[B_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x half>, <8 x half>* %a
  %res = fpext <8 x half> %op1 to <8 x double>
  store <8 x double> %res, <8 x double>* %b
@ -268,13 +268,13 @@ define void @fcvt_v8f32_v8f64(<8 x float>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s
 ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x[[B_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x float>, <8 x float>* %a
  %res = fpext <8 x float> %op1 to <8 x double>
  store <8 x double> %res, <8 x double>* %b
@ -359,9 +359,9 @@ define void @fcvt_v16f32_v16f16(<16 x float>* %a, <16 x half>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
@ -460,9 +460,9 @@ define <8 x half> @fcvt_v8f64_v8f16(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: fcvt [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
@ -558,9 +558,9 @@ define void @fcvt_v8f64_v8f32(<8 x double>* %a, <8 x float>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll
@ -68,16 +68,15 @@ define void @fmaxnm_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -160,16 +159,15 @@ define void @fmaxnm_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -252,16 +250,15 @@ define void @fmaxnm_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
@ -348,16 +345,15 @@ define void @fminnm_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -440,16 +436,15 @@ define void @fminnm_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -532,16 +527,15 @@ define void @fminnm_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
@ -628,16 +622,15 @@ define void @fmax_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -720,16 +713,15 @@ define void @fmax_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -812,16 +804,15 @@ define void @fmax_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
@ -908,16 +899,15 @@ define void @fmin_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -1000,16 +990,15 @@ define void @fmin_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -1092,16 +1081,15 @@ define void @fmin_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
@ -64,9 +64,9 @@ define half @fadda_v32f16(half %start, <32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: fadda h0, [[PG]], h0, [[LO]].h
 ; VBITS_EQ_256-NEXT: fadda h0, [[PG]], h0, [[HI]].h
 ; VBITS_EQ_256-NEXT: ret
@ -137,9 +137,9 @@ define float @fadda_v16f32(float %start, <16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: fadda s0, [[PG]], s0, [[LO]].s
 ; VBITS_EQ_256-NEXT: fadda s0, [[PG]], s0, [[HI]].s
 ; VBITS_EQ_256-NEXT: ret
@ -210,9 +210,9 @@ define double @fadda_v8f64(double %start, <8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: fadda d0, [[PG]], d0, [[LO]].d
 ; VBITS_EQ_256-NEXT: fadda d0, [[PG]], d0, [[HI]].d
 ; VBITS_EQ_256-NEXT: ret
@ -291,10 +291,10 @@ define half @faddv_v32f16(half %start, <32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: faddv h1, [[PG]], [[ADD]].h
 ; VBITS_EQ_256-DAG: fadd h0, h0, [[RDX]]
 ; VBITS_EQ_256-NEXT: ret
@ -370,10 +370,10 @@ define float @faddv_v16f32(float %start, <16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: faddv [[RDX:s[0-9]+]], [[PG]], [[ADD]].s
 ; VBITS_EQ_256-DAG: fadd s0, s0, [[RDX]]
 ; VBITS_EQ_256-NEXT: ret
@ -447,10 +447,10 @@ define double @faddv_v8f64(double %start, <8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]
-; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: faddv [[RDX:d[0-9]+]], [[PG]], [[ADD]].d
 ; VBITS_EQ_256-DAG: fadd d0, d0, [[RDX]]
 ; VBITS_EQ_256-NEXT: ret
@ -525,10 +525,10 @@ define half @fmaxv_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: fmaxnmv h0, [[PG]], [[MAX]].h
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
@ -596,10 +596,10 @@ define float @fmaxv_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: fmaxnmv s0, [[PG]], [[MAX]].s
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
@ -667,10 +667,10 @@ define double @fmaxv_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: fmaxnmv d0, [[PG]], [[MAX]].d
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
@ -742,10 +742,10 @@ define half @fminv_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: fminnmv h0, [[PG]], [[MIN]].h
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
@ -813,10 +813,10 @@ define float @fminv_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: fminnmv s0, [[PG]], [[MIN]].s
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
@ -884,10 +884,10 @@ define double @fminv_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: fminnmv d0, [[PG]], [[MIN]].d
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@ -65,13 +65,13 @@ define void @frintp_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
@ -146,13 +146,13 @@ define void @frintp_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
@ -227,13 +227,13 @@ define void @frintp_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
@ -312,13 +312,13 @@ define void @frintm_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
@ -393,13 +393,13 @@ define void @frintm_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
@ -474,13 +474,13 @@ define void @frintm_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
@ -559,13 +559,13 @@ define void @frinti_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
@ -640,13 +640,13 @@ define void @frinti_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
@ -721,13 +721,13 @@ define void @frinti_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
@ -806,13 +806,13 @@ define void @frintx_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
@ -887,13 +887,13 @@ define void @frintx_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
@ -968,13 +968,13 @@ define void @frintx_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
@ -1053,13 +1053,13 @@ define void @frinta_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
@ -1134,13 +1134,13 @@ define void @frinta_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
@ -1215,13 +1215,13 @@ define void @frinta_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
@ -1300,13 +1300,13 @@ define void @frintn_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
@ -1381,13 +1381,13 @@ define void @frintn_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
@ -1462,13 +1462,13 @@ define void @frintn_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
@ -1547,13 +1547,13 @@ define void @frintz_v32f16(<32 x half>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x half>, <32 x half>* %a
  %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
@ -1628,13 +1628,13 @@ define void @frintz_v16f32(<16 x float>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x float>, <16 x float>* %a
  %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
@ -1709,13 +1709,13 @@ define void @frintz_v8f64(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x double>, <8 x double>* %a
  %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@ -69,13 +69,13 @@ define void @fcvtzu_v32f16_v32i16(<32 x half>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %res = fptoui <32 x half> %op1 to <32 x i16>
@ -163,13 +163,13 @@ define void @fcvtzu_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
  %op1 = load <16 x half>, <16 x half>* %a
  %res = fptoui <16 x half> %op1 to <16 x i32>
  store <16 x i32> %res, <16 x i32>* %b
@ -260,7 +260,7 @@ define void @fcvtzu_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
@ -269,7 +269,7 @@ define void @fcvtzu_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].h
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].h
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x half>, <8 x half>* %a
  %res = fptoui <8 x half> %op1 to <8 x i64>
@ -363,9 +363,9 @@ define void @fcvtzu_v16f32_v16i16(<16 x float>* %a, <16 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].s
@ -459,14 +459,13 @@ define void @fcvtzu_v16f32_v16i32(<16 x float>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %res = fptoui <16 x float> %op1 to <16 x i32>
@ -555,13 +554,13 @@ define void @fcvtzu_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x float>, <8 x float>* %a
  %res = fptoui <8 x float> %op1 to <8 x i64>
  store <8 x i64> %res, <8 x i64>* %b
@ -651,9 +650,9 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_LO:z[0-9]+]].d, [[PG2]]/m, [[LO]].d
@ -752,9 +751,9 @@ define void @fcvtzu_v8f64_v8i32(<8 x double>* %a, <8 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d
@ -849,14 +848,13 @@ define void @fcvtzu_v8f64_v8i64(<8 x double>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %res = fptoui <8 x double> %op1 to <8 x i64>
@ -939,14 +937,13 @@ define void @fcvtzs_v32f16_v32i16(<32 x half>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %res = fptosi <32 x half> %op1 to <32 x i16>
@ -1034,13 +1031,13 @@ define void @fcvtzs_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
  %op1 = load <16 x half>, <16 x half>* %a
  %res = fptosi <16 x half> %op1 to <16 x i32>
  store <16 x i32> %res, <16 x i32>* %b
@ -1131,7 +1128,7 @@ define void @fcvtzs_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
@ -1140,7 +1137,7 @@ define void @fcvtzs_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].h
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].h
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x half>, <8 x half>* %a
  %res = fptosi <8 x half> %op1 to <8 x i64>
@ -1234,9 +1231,9 @@ define void @fcvtzs_v16f32_v16i16(<16 x float>* %a, <16 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].s
@ -1330,14 +1327,13 @@ define void @fcvtzs_v16f32_v16i32(<16 x float>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %res = fptosi <16 x float> %op1 to <16 x i32>
@ -1426,13 +1422,13 @@ define void @fcvtzs_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x float>, <8 x float>* %a
  %res = fptosi <8 x float> %op1 to <8 x i64>
  store <8 x i64> %res, <8 x i64>* %b
@ -1522,9 +1518,9 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(<8 x double>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_LO:z[0-9]+]].d, [[PG2]]/m, [[LO]].d
@ -1623,9 +1619,9 @@ define void @fcvtzs_v8f64_v8i32(<8 x double>* %a, <8 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d
@ -1720,14 +1716,13 @@ define void @fcvtzs_v8f64_v8i64(<8 x double>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %res = fptosi <8 x double> %op1 to <8 x i64>
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll
@ -73,17 +73,17 @@ define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[OFF_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].b, [[PG]]/z, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].b, [[PG]]/z, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].b, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].b, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: st1b { [[SEXT_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[OFF_HI]]]
+; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -176,18 +176,17 @@ define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -280,18 +279,17 @@ define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -384,18 +382,17 @@ define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
 ; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[A_HI]]]
+; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
@ -151,9 +151,9 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
 ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h
 ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[OUT_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x[[OUT_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %b = sext <16 x i8> %a to <16 x i32>
  store <16 x i32> %b, <16 x i32>* %out
@ -513,9 +513,9 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
 ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[OUT_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[OUT_HI:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x[[OUT_HI]]]
+; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[OUT_HI]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %b = zext <16 x i8> %a to <16 x i32>
  store <16 x i32> %b, <16 x i32>* %out
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
@ -68,15 +68,15 @@ define void @smax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -159,16 +159,15 @@ define void @smax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -251,16 +250,15 @@ define void @smax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -345,16 +343,15 @@ define void @smax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -441,15 +438,15 @@ define void @smin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
  %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
@ -531,16 +528,15 @@ define void @smin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -623,16 +619,15 @@ define void @smin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -717,16 +712,15 @@ define void @smin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -813,15 +807,15 @@ define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -904,16 +898,15 @@ define void @umax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -996,16 +989,15 @@ define void @umax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -1090,16 +1082,15 @@ define void @umax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -1186,15 +1177,15 @@ define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
  %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
@ -1276,16 +1267,15 @@ define void @umin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -1368,16 +1358,15 @@ define void @umin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -1462,16 +1451,15 @@ define void @umin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
@ -64,9 +64,9 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
 ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].b
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
@ -140,10 +140,10 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].h
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -216,10 +216,10 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].s
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -292,10 +292,10 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -372,9 +372,9 @@ define i8 @smaxv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
 ; VBITS_EQ_256-DAG: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -448,10 +448,10 @@ define i16 @smaxv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -524,10 +524,10 @@ define i32 @smaxv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -602,10 +602,10 @@ define i64 @smaxv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -682,9 +682,9 @@ define i8 @sminv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
 ; VBITS_EQ_256-DAG: sminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -758,10 +758,10 @@ define i16 @sminv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: sminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -834,10 +834,10 @@ define i32 @sminv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: sminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -912,10 +912,10 @@ define i64 @sminv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: sminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -992,9 +992,9 @@ define i8 @umaxv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
 ; VBITS_EQ_256-DAG: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -1068,10 +1068,10 @@ define i16 @umaxv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -1144,10 +1144,10 @@ define i32 @umaxv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -1222,10 +1222,10 @@ define i64 @umaxv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -1302,9 +1302,9 @@ define i8 @uminv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
 ; VBITS_EQ_256-DAG: uminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -1378,10 +1378,10 @@ define i16 @uminv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
+; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
 ; VBITS_EQ_256-DAG: uminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -1454,10 +1454,10 @@ define i32 @uminv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
+; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
 ; VBITS_EQ_256-DAG: uminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
@ -1532,10 +1532,10 @@ define i64 @uminv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
-; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
+; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
 ; VBITS_EQ_256-DAG: uminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll
@ -70,15 +70,15 @@ define void @ashr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -163,16 +163,15 @@ define void @ashr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -257,16 +256,15 @@ define void @ashr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -351,16 +349,15 @@ define void @ashr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -449,15 +446,15 @@ define void @lshr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -542,16 +539,15 @@ define void @lshr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -636,16 +632,15 @@ define void @lshr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -730,16 +725,15 @@ define void @lshr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -826,15 +820,15 @@ define void @shl_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b
 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -917,16 +911,15 @@ define void @shl_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h
 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -1009,16 +1002,15 @@ define void @shl_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s
 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -1101,16 +1093,15 @@ define void @shl_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d
 ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
@ -69,14 +69,13 @@ define void @ucvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %res = uitofp <32 x i16> %op1 to <32 x half>
@ -165,13 +164,13 @@ define void @ucvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
  %op1 = load <16 x i16>, <16 x i16>* %a
  %res = uitofp <16 x i16> %op1 to <16 x float>
  store <16 x float> %res, <16 x float>* %b
@ -264,7 +263,7 @@ define void @ucvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
@ -273,7 +272,7 @@ define void @ucvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i16>, <8 x i16>* %a
  %res = uitofp <8 x i16> %op1 to <8 x double>
@ -363,9 +362,9 @@ define void @ucvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
@ -459,14 +458,13 @@ define void @ucvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %res = uitofp <16 x i32> %op1 to <16 x float>
@ -555,13 +553,13 @@ define void @ucvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS]], #4
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x i32>, <8 x i32>* %a
  %res = uitofp <8 x i32> %op1 to <8 x double>
  store <8 x double> %res, <8 x double>* %b
@ -651,9 +649,9 @@ define <8 x half> @ucvtf_v8i64_v8f16(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
@ -752,9 +750,9 @@ define void @ucvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
@ -849,14 +847,13 @@ define void @ucvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %res = uitofp <8 x i64> %op1 to <8 x double>
@ -939,14 +936,13 @@ define void @scvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %res = sitofp <32 x i16> %op1 to <32 x half>
@ -1035,13 +1031,13 @@ define void @scvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
  %op1 = load <16 x i16>, <16 x i16>* %a
  %res = sitofp <16 x i16> %op1 to <16 x float>
  store <16 x float> %res, <16 x float>* %b
@ -1134,7 +1130,7 @@ define void @scvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
@ -1143,7 +1139,7 @@ define void @scvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i16>, <8 x i16>* %a
  %res = sitofp <8 x i16> %op1 to <8 x double>
@ -1233,9 +1229,9 @@ define void @scvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
@ -1329,14 +1325,13 @@ define void @scvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %res = sitofp <16 x i32> %op1 to <16 x float>
@ -1425,13 +1420,13 @@ define void @scvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
  %op1 = load <8 x i32>, <8 x i32>* %a
  %res = sitofp <8 x i32> %op1 to <8 x double>
  store <8 x double> %res, <8 x double>* %b
@ -1521,9 +1516,9 @@ define <8 x half> @scvtf_v8i64_v8f16(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
@ -1622,9 +1617,9 @@ define void @scvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
@ -1719,14 +1714,13 @@ define void @scvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %res = sitofp <8 x i64> %op1 to <8 x double>
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll
@ -56,8 +56,8 @@ define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
 ; CHECK-LABEL: load_v16f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
 ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
-; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]]
+; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
 ; CHECK: ret
  %load = load <16 x float>, <16 x float>* %a
  ret <16 x float> %load
@ -67,12 +67,12 @@ define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
 ; CHECK-LABEL: load_v32f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
 ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
-; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]]
-; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
-; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A2]]]
-; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
-; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A3]]]
+; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
+; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
+; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2]
+; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
+; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2]
 ; CHECK: ret
  %load = load <32 x float>, <32 x float>* %a
  ret <32 x float> %load
@ -82,20 +82,20 @@ define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
 ; CHECK-LABEL: load_v64f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
 ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
-; VBITS_LE_1024-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_1024-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]]
-; VBITS_LE_512-DAG:  add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
-; VBITS_LE_512-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A2]]]
-; VBITS_LE_512-DAG:  add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
-; VBITS_LE_512-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A3]]]
-; VBITS_LE_256-DAG:  add x[[A4:[0-9]+]], x0, #[[#mul(VBYTES,4)]]
-; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A4]]]
-; VBITS_LE_256-DAG:  add x[[A5:[0-9]+]], x0, #[[#mul(VBYTES,5)]]
-; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A5]]]
-; VBITS_LE_256-DAG:  add x[[A6:[0-9]+]], x0, #[[#mul(VBYTES,6)]]
-; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A6]]]
-; VBITS_LE_256-DAG:  add x[[A7:[0-9]+]], x0, #[[#mul(VBYTES,7)]]
-; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A7]]]
+; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_1024-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
+; VBITS_LE_512-DAG:  mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
+; VBITS_LE_512-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2]
+; VBITS_LE_512-DAG:  mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
+; VBITS_LE_512-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]]
+; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A4]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]]
+; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A5]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]]
+; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A6]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]]
+; VBITS_LE_256-DAG:  ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A7]], lsl #2]
 ; CHECK: ret
  %load = load <64 x float>, <64 x float>* %a
  ret <64 x float> %load
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
@ -68,9 +68,9 @@ define i8 @andv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: andv b[[REDUCE:[0-9]+]], [[PG]], [[AND]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -149,9 +149,9 @@ define i16 @andv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: andv h[[REDUCE:[0-9]+]], [[PG]], [[AND]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -229,9 +229,9 @@ define i32 @andv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: andv [[REDUCE:s[0-9]+]], [[PG]], [[AND]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
@ -307,9 +307,9 @@ define i64 @andv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: andv [[REDUCE:d[0-9]+]], [[PG]], [[AND]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
@ -391,9 +391,9 @@ define i8 @eorv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: eorv b[[REDUCE:[0-9]+]], [[PG]], [[EOR]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -472,9 +472,9 @@ define i16 @eorv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: eorv h[[REDUCE:[0-9]+]], [[PG]], [[EOR]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -552,9 +552,9 @@ define i32 @eorv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: eorv [[REDUCE:s[0-9]+]], [[PG]], [[EOR]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
@ -630,9 +630,9 @@ define i64 @eorv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: eorv [[REDUCE:d[0-9]+]], [[PG]], [[EOR]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
@ -714,9 +714,9 @@ define i8 @orv_v64i8(<64 x i8>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: orv b[[REDUCE:[0-9]+]], [[PG]], [[OR]].b
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -795,9 +795,9 @@ define i16 @orv_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: orv h[[REDUCE:[0-9]+]], [[PG]], [[OR]].h
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
@ -875,9 +875,9 @@ define i32 @orv_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: orv [[REDUCE:s[0-9]+]], [[PG]], [[OR]].s
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
@ -953,9 +953,9 @@ define i64 @orv_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 ; VBITS_EQ_256-DAG: orv [[REDUCE:d[0-9]+]], [[PG]], [[OR]].d
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@ -86,9 +86,9 @@ define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr d[[VALS:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0
 ; VBITS_EQ_256-DAG: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b
 ; VBITS_EQ_256-DAG: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b
@ -223,9 +223,9 @@ define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[VALS:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4
 ; VBITS_EQ_256-DAG: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0
 ; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[ZMSK]].h, #0
@ -347,9 +347,9 @@ define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].s, vl8
 ; VBITS_EQ_256-DAG: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0
 ; VBITS_EQ_256-DAG: mov x8, sp
 ; VBITS_EQ_256-DAG: mov [[MONE:z[0-9]+]].s, p1/z, #-1
@ -474,18 +474,17 @@ define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0
 ; VBITS_EQ_256-DAG: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0
 ; VBITS_EQ_256-DAG: ld1d { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d]
 ; VBITS_EQ_256-DAG: ld1d { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d]
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG0]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG0]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG0]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %cval = load <8 x i64>, <8 x i64>* %a
  %ptrs = load <8 x i64*>, <8 x i64*>* %b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@ -82,14 +82,14 @@ define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
 ; VBITS_EQ_256-DAG: ldr d[[VALS:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0
 ; VBITS_EQ_256-DAG: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b
 ; VBITS_EQ_256-DAG: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b
 ; VBITS_EQ_256-DAG: shl [[SHL_LO:v[0-9]+]].4h, [[VAL_LO]].4h, #8
 ; VBITS_EQ_256-DAG: shl [[SHL_HI:v[0-9]+]].4h, [[VAL_HI]].4h, #8
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: sshr v[[SSHR_LO:[0-9]+]].4h, [[SHL_LO]].4h, #8
 ; VBITS_EQ_256-DAG: sshr v[[SSHR_HI:[0-9]+]].4h, [[SHL_HI]].4h, #8
 ; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_LO]].h, #0
@ -208,9 +208,9 @@ define void @masked_scatter_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ldr q[[VALS:[0-9]+]], [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4
 ; VBITS_EQ_256-DAG: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0
 ; VBITS_EQ_256-DAG: ext v[[EXT:[0-9]+]].16b, v[[VALS]].16b, v[[VALS]].16b, #8
@ -321,9 +321,9 @@ define void @masked_scatter_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].s, vl8
 ; VBITS_EQ_256-DAG: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0]
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0
 ; VBITS_EQ_256-DAG: add x8, sp, #32
 ; VBITS_EQ_256-DAG: mov x9, sp
@ -439,12 +439,11 @@ define void @masked_scatter_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8]
+; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0
 ; VBITS_EQ_256-DAG: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0
 ; VBITS_EQ_256-DAG: st1d { [[VALS_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d]
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
@ -65,13 +65,13 @@ define void @bitreverse_v64i8(<64 x i8>* %a) #0 {
 ;
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]]
+; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
 ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]]
+; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <64 x i8>, <64 x i8>* %a
  %res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op)
@ -146,13 +146,13 @@ define void @bitreverse_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x i16>, <32 x i16>* %a
  %res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op)
@ -227,13 +227,13 @@ define void @bitreverse_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x i32>, <16 x i32>* %a
  %res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op)
@ -308,13 +308,13 @@ define void @bitreverse_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x i64>, <8 x i64>* %a
  %res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op)
@ -393,13 +393,13 @@ define void @bswap_v32i16(<32 x i16>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <32 x i16>, <32 x i16>* %a
  %res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op)
@ -474,13 +474,13 @@ define void @bswap_v16i32(<16 x i32>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <16 x i32>, <16 x i32>* %a
  %res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op)
@ -555,13 +555,13 @@ define void @bswap_v8i64(<8 x i64>* %a) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op = load <8 x i64>, <8 x i64>* %a
  %res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op)
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
@ -66,9 +66,9 @@ define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[OFFSET_HI]]
+; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <64 x i8> undef, i8 %a, i64 0
  %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
@ -142,9 +142,9 @@ define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <32 x i16> undef, i16 %a, i64 0
  %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
@ -218,9 +218,9 @@ define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <16 x i32> undef, i32 %a, i64 0
  %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
@ -294,9 +294,9 @@ define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1]
-; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <8 x i64> undef, i64 %a, i64 0
  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
@ -374,9 +374,9 @@ define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <32 x half> undef, half %a, i64 0
  %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
@ -450,9 +450,9 @@ define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <16 x float> undef, float %a, i64 0
  %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
@ -526,9 +526,9 @@ define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]]
+; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %insert = insertelement <8 x double> undef, double %a, i64 0
  %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll
@ -56,8 +56,8 @@ define void @store_v16f32(<16 x float>* %a) #0 {
 ; CHECK-LABEL: store_v16f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
 ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
-; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
+; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
 ; CHECK: ret
  store <16 x float> zeroinitializer, <16 x float>* %a
  ret void
@ -67,12 +67,12 @@ define void @store_v32f32(<32 x float>* %a) #0 {
 ; CHECK-LABEL: store_v32f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
 ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
-; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
-; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
-; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]]
-; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
-; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]]
+; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
+; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
+; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2]
+; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
+; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2]
 ; CHECK: ret
  store <32 x float> zeroinitializer, <32 x float>* %a
  ret void
@ -82,20 +82,20 @@ define void @store_v64f32(<64 x float>* %a) #0 {
 ; CHECK-LABEL: store_v64f32:
 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
 ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
-; VBITS_LE_1024-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
-; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
-; VBITS_LE_512-DAG:  add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
-; VBITS_LE_512-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]]
-; VBITS_LE_512-DAG:  add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
-; VBITS_LE_512-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]]
-; VBITS_LE_256-DAG:  add x[[A4:[0-9]+]], x0, #[[#mul(VBYTES,4)]]
-; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A4]]]
-; VBITS_LE_256-DAG:  add x[[A5:[0-9]+]], x0, #[[#mul(VBYTES,5)]]
-; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A5]]]
-; VBITS_LE_256-DAG:  add x[[A6:[0-9]+]], x0, #[[#mul(VBYTES,6)]]
-; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A6]]]
-; VBITS_LE_256-DAG:  add x[[A7:[0-9]+]], x0, #[[#mul(VBYTES,7)]]
-; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A7]]]
+; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
+; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
+; VBITS_LE_512-DAG:  mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
+; VBITS_LE_512-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2]
+; VBITS_LE_512-DAG:  mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
+; VBITS_LE_512-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]]
+; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A4]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]]
+; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A5]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]]
+; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A6]], lsl #2]
+; VBITS_LE_256-DAG:  mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]]
+; VBITS_LE_256-DAG:  st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A7]], lsl #2]
 ; CHECK: ret
  store <64 x float> zeroinitializer, <64 x float>* %a
  ret void
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll
@ -53,9 +53,9 @@ define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
@ -103,9 +103,9 @@ define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
 ; Ensure sensible type legalisation.
 ; Currently does not use the truncating store
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
 ; VBITS_EQ_256-DAG: uzp1 z[[HALFS_LO:[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
@ -128,9 +128,9 @@ define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
@ -154,9 +154,9 @@ define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
 ; Ensure sensible type legalisation.
 ; Currently does not use the truncating store
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
 ; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h
 ; VBITS_EQ_256-DAG: uzp1 z[[BYTES_LO:[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b
@ -179,9 +179,9 @@ define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 ; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].h, vl8
 ; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
 ; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h
@ -204,9 +204,9 @@ define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {

 ; Ensure sensible type legalisation
 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 ; VBITS_EQ_256-DAG: ld1h { [[HALFS_LO:z[0-9]+]].h }, [[PG1]]/z, [x0]
-; VBITS_EQ_256-DAG: ld1h { [[HALFS_HI:z[0-9]+]].h }, [[PG1]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[HALFS_HI:z[0-9]+]].h }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].b, vl16
 ; VBITS_EQ_256-DAG: uzp1 [[BYTES_LO:z[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b
 ; VBITS_EQ_256-DAG: uzp1 [[BYTES_HI:z[0-9]+]].b, [[HALFS_HI]].b, [[HALFS_HI]].b
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
@ -72,10 +72,10 @@ define void @shuffle_ext_byone_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_EQ_256-DAG: mov w8, #32
-; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x8]
+; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
+; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x8]
+; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].b, [[OP1_HI]].b[31]
 ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].b, [[OP2_LO]].b[31]
@ -83,7 +83,7 @@ define void @shuffle_ext_byone_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
 ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].b, [[TMP2]]
 ; VBITS_EQ_256-DAG: st1b { [[OP2_LO]].b }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1b { [[OP2_HI]].b }, [[PG]], [x0, x8]
+; VBITS_EQ_256-DAG: st1b { [[OP2_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <64 x i8>, <64 x i8>* %a
  %op2 = load <64 x i8>, <64 x i8>* %b
@ -230,11 +230,10 @@ define void @shuffle_ext_byone_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].h, [[OP1_HI]].h[15]
 ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].h, [[OP2_LO]].h[15]
@ -242,7 +241,7 @@ define void @shuffle_ext_byone_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
 ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].h, [[TMP2]]
 ; VBITS_EQ_256-DAG: st1h { [[OP2_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x i16>, <32 x i16>* %a
  %op2 = load <32 x i16>, <32 x i16>* %b
@ -360,11 +359,10 @@ define void @shuffle_ext_byone_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].s, [[OP1_HI]].s[7]
 ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].s, [[OP2_LO]].s[7]
@ -372,7 +370,7 @@ define void @shuffle_ext_byone_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
 ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].s, [[TMP2]]
 ; VBITS_EQ_256-DAG: st1w { [[OP2_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ret

  %op1 = load <16 x i32>, <16 x i32>* %a
@ -468,11 +466,10 @@ define void @shuffle_ext_byone_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3]
 ; VBITS_EQ_256-DAG: fmov [[TMP1:x[0-9]+]], d[[ELEM1]]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].d, [[OP2_LO]].d[3]
@ -480,7 +477,7 @@ define void @shuffle_ext_byone_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
 ; VBITS_EQ_256-DAG: fmov [[TMP2:x[0-9]+]], d[[ELEM2]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].d, [[TMP2]]
 ; VBITS_EQ_256-DAG: st1d { [[OP2_LO]].d }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x i64>, <8 x i64>* %a
  %op2 = load <8 x i64>, <8 x i64>* %b
@ -576,17 +573,16 @@ define void @shuffle_ext_byone_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].h, [[OP2_LO]].h[15]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].h, [[OP1_HI]].h[15]
 ; VBITS_EQ_256-DAG: insr [[OP2_LO]].h, h[[ELEM1]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].h, h[[ELEM2]]
 ; VBITS_EQ_256-DAG: st1h { [[OP2_LO]].h }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <32 x half>, <32 x half>* %a
  %op2 = load <32 x half>, <32 x half>* %b
@ -702,17 +698,16 @@ define void @shuffle_ext_byone_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x8]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].s, [[OP2_LO]].s[7]
 ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].s, [[OP1_HI]].s[7]
 ; VBITS_EQ_256-DAG: insr [[OP2_LO]].s, s[[ELEM1]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].s, s[[ELEM2]]
 ; VBITS_EQ_256-DAG: st1w { [[OP2_LO]].s }, [[PG]], [x0]
-; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x8]
+; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <16 x float>, <16 x float>* %a
  %op2 = load <16 x float>, <16 x float>* %b
@ -805,17 +800,16 @@ define void @shuffle_ext_byone_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {

 ; Ensure sensible type legalisation.
 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_EQ_256-DAG: add x8, x0, #32
-; VBITS_EQ_256-DAG: add x9, x1, #32
-; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x8]
-; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x9]
+; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].d, [[OP2_LO]].d[3]
+; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3]
 ; VBITS_EQ_256-DAG: insr [[OP2_LO]].d, d[[ELEM1]]
 ; VBITS_EQ_256-DAG: insr [[OP2_HI]].d, d[[ELEM2]]
-; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x8]
 ; VBITS_EQ_256-DAG: st1d { [[OP2_LO]].d }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 ; VBITS_EQ_256-NEXT: ret
  %op1 = load <8 x double>, <8 x double>* %a
  %op2 = load <8 x double>, <8 x double>* %b
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll
@ -104,8 +104,8 @@ define <vscale x 16 x i8> @ld1rob_i8_lower_bound(<vscale x 16 x i1> %pg, i8* %a)
 ; below lower bound
 define <vscale x 8 x i16> @ld1roh_i16_below_lower_bound(<vscale x 8 x i1> %pg, i16* %a) nounwind {
 ; CHECK-LABEL: ld1roh_i16_below_lower_bound:
-; CHECK-NEXT:  sub     x[[BASE:[0-9]+]], x0, #258
-; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x[[BASE]]]
+; CHECK-NEXT:  mov     x[[IDX:[0-9]+]], #-129
+; CHECK-NEXT:  ld1roh { z0.h }, p0/z, [x0, x[[IDX]], lsl #1]
 ; CHECK-NEXT:  ret
  %base = getelementptr i16, i16* %a, i64 -129
  %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base)
@ -125,8 +125,8 @@ define <vscale x 16 x i8> @ld1rob_i8_below_lower_bound_01(<vscale x 16 x i1> %pg
 ; not a multiple of 32
 define<vscale x 4 x i32> @ld1row_i32_not_multiple(<vscale x 4 x i1> %pg, i32* %a) nounwind {
 ; CHECK-LABEL: ld1row_i32_not_multiple:
-; CHECK-NEXT:  add x[[BASE:[0-9]+]], x0, #12
-; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x[[BASE]]]
+; CHECK-NEXT:  mov x[[IDX:[0-9]+]], #3
+; CHECK-NEXT:  ld1row { z0.s }, p0/z, [x0, x[[IDX]], lsl #2]
 ; CHECK-NEXT:  ret
  %base = getelementptr i32, i32* %a, i64 3
  %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base)
--- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
@ -62,17 +62,16 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,1) }
 define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 {
 ; CHECK-LABEL: func_vscale2_2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #8
 ; CHECK-NEXT:    ptrue p0.s, vl8
-; CHECK-NEXT:    add x8, x0, #32 // =32
-; CHECK-NEXT:    add x9, x1, #32 // =32
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x8]
-; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x1]
-; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x1]
 ; CHECK-NEXT:    add z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    add z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
-; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b
@ -86,17 +85,16 @@ attributes #2 = { "target-features"="+sve" vscale_range(2,2) }
 define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 {
 ; CHECK-LABEL: func_vscale2_4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #8
 ; CHECK-NEXT:    ptrue p0.s, vl8
-; CHECK-NEXT:    add x8, x0, #32 // =32
-; CHECK-NEXT:    add x9, x1, #32 // =32
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x8]
-; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x1]
-; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
+; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x1]
 ; CHECK-NEXT:    add z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    add z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
-; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
  %op1 = load <16 x i32>, <16 x i32>* %a
  %op2 = load <16 x i32>, <16 x i32>* %b