From 51497dc0b20179eb145820cb8f651f026960de08 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Fri, 17 Dec 2021 09:39:21 +0000
Subject: [PATCH] [IR] Change vector.splice intrinsic to reject out-of-bounds
 indices

I've changed the definition of the experimental.vector.splice
instrinsic to reject indices that are known to be or possibly
out-of-bounds. In practice, this means changing the definition so that
the index is now only valid in the range [-VL, VL-1] where VL is the
known minimum vector length. We use the vscale_range attribute to
take the minimum vscale value into account so that we can permit
more indices when the attribute is present.

The splice intrinsic is currently only ever generated by the vectoriser,
which will never attempt to splice vectors with out-of-bounds values.
Changing the definition also makes things simpler for codegen since we
can always assume that the index is valid.

This patch was created in response to review comments on D115863

Differential Revision: https://reviews.llvm.org/D115933
---
 llvm/docs/LangRef.rst                         |   9 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   6 -
 llvm/lib/IR/Verifier.cpp                      |  18 +
 .../AArch64/named-vector-shuffles-neon.ll     |  18 -
 .../AArch64/named-vector-shuffles-sve.ll      | 308 ++----------------
 llvm/test/Verifier/invalid-splice.ll          |  37 +++
 6 files changed, 89 insertions(+), 307 deletions(-)
 create mode 100644 llvm/test/Verifier/invalid-splice.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index d8fd7da7ce77..342b79bcc703 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -17220,10 +17220,11 @@ For example:
 Arguments:
 """"""""""
 
-The first two operands are vectors with the same type. The third argument
-``imm`` is the start index, modulo VL, where VL is the runtime vector length of
-the source/result vector. The ``imm`` is a signed integer constant in the range
-``-VL <= imm < VL``. For values outside of this range the result is poison.
+The first two operands are vectors with the same type. The start index is imm
+modulo the runtime number of elements in the source vector. For a fixed-width
+vector <N x eltty>, imm is a signed integer constant in the range
+-N <= imm < N. For a scalable vector <vscale x N x eltty>, imm is a signed
+integer constant in the range -X <= imm < X where X=vscale_range_min * N.
 
 '``llvm.experimental.stepvector``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ff482f3a3333..9674a88bae95 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -11245,12 +11245,6 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
 
   unsigned NumElts = VT.getVectorNumElements();
 
-  if ((-Imm > NumElts) || (Imm >= NumElts)) {
-    // Result is undefined if immediate is out-of-bounds.
-    setValue(&I, DAG.getUNDEF(VT));
-    return;
-  }
-
   uint64_t Idx = (NumElts + Imm) % NumElts;
 
   // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 0e031051f096..fecbfc7a3ab7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5352,6 +5352,24 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
 
     break;
   }
+  case Intrinsic::experimental_vector_splice: {
+    VectorType *VecTy = cast<VectorType>(Call.getType());
+    int64_t Idx = cast<ConstantInt>(Call.getArgOperand(2))->getSExtValue();
+    int64_t KnownMinNumElements = VecTy->getElementCount().getKnownMinValue();
+    if (Call.getParent() && Call.getParent()->getParent()) {
+      AttributeList Attrs = Call.getParent()->getParent()->getAttributes();
+      if (Attrs.hasFnAttr(Attribute::VScaleRange))
+        KnownMinNumElements *= Attrs.getFnAttrs().getVScaleRangeMin();
+    }
+    Assert((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) ||
+               (Idx >= 0 && Idx < KnownMinNumElements),
+           "The splice index exceeds the range [-VL, VL-1] where VL is the "
+           "known minimum number of elements in the vector. For scalable "
+           "vectors the minimum number of elements is determined from "
+           "vscale_range.",
+           &Call);
+    break;
+  }
   case Intrinsic::experimental_stepvector: {
     VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
     Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll
index 822933653b93..38bd6447ac7c 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll
@@ -62,15 +62,6 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 {
   ret <16 x float> %res
 }
 
-; Verify out-of-bounds index results in undef vector.
-define <2 x double> @splice_v2f64_idx_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: splice_v2f64_idx_out_of_bounds:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
-  ret <2 x double> %res
-}
-
 ;
 ; VECTOR_SPLICE (trailing elements)
 ;
@@ -130,15 +121,6 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 {
   ret <16 x float> %res
 }
 
-; Verify out-of-bounds trailing element count results in undef vector.
-define <2 x double> @splice_v2f64_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: splice_v2f64_out_of_bounds:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
-  ret <2 x double> %res
-}
-
 declare <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8>, <2 x i8>, i32)
 declare <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8>, <16 x i8>, i32)
 declare <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32>, <8 x i32>, i32)
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index 91b2281b167e..a03d8e0048f9 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -24,7 +24,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vsca
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
 ; CHECK-LABEL: splice_nxv16i8_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #255
@@ -33,29 +33,6 @@ define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscal
   ret <vscale x 16 x i8> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: splice_nxv16i8_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    mov x8, #-1
-; CHECK-NEXT:    mov w9, #256
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    cmp x8, #256
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 256)
-  ret <vscale x 16 x i8> %res
-}
-
 define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
 ; CHECK-LABEL: splice_nxv8i16_first_idx:
 ; CHECK:       // %bb.0:
@@ -65,38 +42,6 @@ define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vsca
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 8 x i16> @splice_nxv8i16_last_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_last_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 127)
-  ret <vscale x 8 x i16> %res
-}
-
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    mov w9, #128
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #128
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 128)
-  ret <vscale x 8 x i16> %res
-}
-
 define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
 ; CHECK-LABEL: splice_nxv4i32_first_idx:
 ; CHECK:       // %bb.0:
@@ -106,7 +51,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vsca
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #1 {
 ; CHECK-LABEL: splice_nxv4i32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -115,29 +60,6 @@ define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscal
   ret <vscale x 4 x i32> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: splice_nxv4i32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 64)
-  ret <vscale x 4 x i32> %res
-}
-
 define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
 ; CHECK-LABEL: splice_nxv2i64_first_idx:
 ; CHECK:       // %bb.0:
@@ -147,7 +69,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vsca
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #1 {
 ; CHECK-LABEL: splice_nxv2i64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -156,29 +78,6 @@ define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscal
   ret <vscale x 2 x i64> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: splice_nxv2i64_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 32)
-  ret <vscale x 2 x i64> %res
-}
-
 define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
 ; CHECK-LABEL: splice_nxv2f16_neg_idx:
 ; CHECK:       // %bb.0:
@@ -219,7 +118,7 @@ define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vs
   ret <vscale x 2 x half> %res
 }
 
-define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
+define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #1 {
 ; CHECK-LABEL: splice_nxv2f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -228,31 +127,6 @@ define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vsc
   ret <vscale x 2 x half> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x half> @splice_nxv2f16_clamped_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv2f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #3
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 32)
-  ret <vscale x 2 x half> %res
-}
-
 define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
 ; CHECK-LABEL: splice_nxv4f16_neg_idx:
 ; CHECK:       // %bb.0:
@@ -293,7 +167,7 @@ define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vs
   ret <vscale x 4 x half> %res
 }
 
-define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
+define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #1 {
 ; CHECK-LABEL: splice_nxv4f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -302,31 +176,6 @@ define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vsc
   ret <vscale x 4 x half> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x half> @splice_nxv4f16_clamped_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv4f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #2
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 64)
-  ret <vscale x 4 x half> %res
-}
-
 define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: splice_nxv8f16_first_idx:
 ; CHECK:       // %bb.0:
@@ -336,7 +185,7 @@ define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vs
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #1 {
 ; CHECK-LABEL: splice_nxv8f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
@@ -345,29 +194,6 @@ define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vsc
   ret <vscale x 8 x half> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    mov w9, #128
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #128
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 128)
-  ret <vscale x 8 x half> %res
-}
-
 define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
 ; CHECK-LABEL: splice_nxv2f32_neg_idx:
 ; CHECK:       // %bb.0:
@@ -408,7 +234,7 @@ define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <
   ret <vscale x 2 x float> %res
 }
 
-define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
+define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #1 {
 ; CHECK-LABEL: splice_nxv2f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -417,31 +243,6 @@ define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <v
   ret <vscale x 2 x float> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x float> @splice_nxv2f32_clamped_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv2f32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #3
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 32)
-  ret <vscale x 2 x float> %res
-}
-
 define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
 ; CHECK-LABEL: splice_nxv4f32_first_idx:
 ; CHECK:       // %bb.0:
@@ -451,7 +252,7 @@ define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #1 {
 ; CHECK-LABEL: splice_nxv4f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -460,29 +261,6 @@ define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <v
   ret <vscale x 4 x float> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv4f32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 64)
-  ret <vscale x 4 x float> %res
-}
-
 define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: splice_nxv2f64_first_idx:
 ; CHECK:       // %bb.0:
@@ -492,7 +270,7 @@ define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a,
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
 ; CHECK-LABEL: splice_nxv2f64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -501,29 +279,6 @@ define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a,
   ret <vscale x 2 x double> %res
 }
 
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: splice_nxv2f64_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 32)
-  ret <vscale x 2 x double> %res
-}
-
 ; Ensure predicate based splice is promoted to use ZPRs.
 define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
 ; CHECK-LABEL: splice_nxv2i1_idx:
@@ -617,8 +372,8 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
 }
 
 ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv16f32_clamped_idx:
+define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv16f32_16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-8
@@ -684,9 +439,8 @@ define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16
   ret <vscale x 16 x i8> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 16 x i8> @splice_nxv16i8_clamped(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: splice_nxv16i8_clamped:
+define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
+; CHECK-LABEL: splice_nxv16i8_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -739,9 +493,8 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
   ret <vscale x 8 x i16> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 8 x i16> @splice_nxv8i16_clamped(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_clamped:
+define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {
+; CHECK-LABEL: splice_nxv8i16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -794,9 +547,8 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
   ret <vscale x 4 x i32> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 4 x i32> @splice_nxv4i32_clamped(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: splice_nxv4i32_clamped:
+define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {
+; CHECK-LABEL: splice_nxv4i32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -849,9 +601,8 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
   ret <vscale x 2 x i64> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 2 x i64> @splice_nxv2i64_clamped(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: splice_nxv2i64_clamped:
+define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {
+; CHECK-LABEL: splice_nxv2i64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -904,9 +655,8 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
   ret <vscale x 8 x half> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 8 x half> @splice_nxv8f16_clamped(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_clamped:
+define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {
+; CHECK-LABEL: splice_nxv8f16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -959,9 +709,8 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
   ret <vscale x 4 x float> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 4 x float> @splice_nxv4f32_clamped(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv4f32_clamped:
+define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv4f32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -1014,9 +763,8 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
   ret <vscale x 2 x double> %res
 }
 
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: splice_nxv2f64_clamped:
+define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {
+; CHECK-LABEL: splice_nxv2f64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -1147,8 +895,8 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
 }
 
 ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_clamped(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv16f32_clamped:
+define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv16f32_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-8
@@ -1198,3 +946,5 @@ declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale
 declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
 
 attributes #0 = { nounwind "target-features"="+sve" }
+attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
+attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }
diff --git a/llvm/test/Verifier/invalid-splice.ll b/llvm/test/Verifier/invalid-splice.ll
new file mode 100644
index 000000000000..8c0c569edc7a
--- /dev/null
+++ b/llvm/test/Verifier/invalid-splice.ll
@@ -0,0 +1,37 @@
+; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx_neg3(<2 x double> %a, <2 x double> %b) #0 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
+  ret <2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <vscale x 2 x double> @splice_nxv2f64_idx_neg3_vscale_min1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
+  ret <vscale x 2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <vscale x 2 x double> @splice_nxv2f64_idx_neg5_vscale_min2(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -5)
+  ret <vscale x 2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx2(<2 x double> %a, <2 x double> %b) #0 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
+  ret <2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx3(<2 x double> %a, <2 x double> %b) #1 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 4)
+  ret <2 x double> %res
+}
+
+attributes #0 = { vscale_range(1,16) }
+attributes #1 = { vscale_range(2,16) }
+
+declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32)
+declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)