[IR] Change vector.splice intrinsic to reject out-of-bounds indices

I've changed the definition of the experimental.vector.splice
instrinsic to reject indices that are known to be or possibly
out-of-bounds. In practice, this means changing the definition so that
the index is now only valid in the range [-VL, VL-1] where VL is the
known minimum vector length. We use the vscale_range attribute to
take the minimum vscale value into account so that we can permit
more indices when the attribute is present.

The splice intrinsic is currently only ever generated by the vectoriser,
which will never attempt to splice vectors with out-of-bounds values.
Changing the definition also makes things simpler for codegen since we
can always assume that the index is valid.

This patch was created in response to review comments on D115863

Differential Revision: https://reviews.llvm.org/D115933
This commit is contained in:
David Sherwood 2021-12-17 09:39:21 +00:00
parent 22ac067b2d
commit 51497dc0b2
6 changed files with 89 additions and 307 deletions

View File

@ -17220,10 +17220,11 @@ For example:
Arguments:
""""""""""
The first two operands are vectors with the same type. The third argument
``imm`` is the start index, modulo VL, where VL is the runtime vector length of
the source/result vector. The ``imm`` is a signed integer constant in the range
``-VL <= imm < VL``. For values outside of this range the result is poison.
The first two operands are vectors with the same type. The start index is imm
modulo the runtime number of elements in the source vector. For a fixed-width
vector <N x eltty>, imm is a signed integer constant in the range
-N <= imm < N. For a scalable vector <vscale x N x eltty>, imm is a signed
integer constant in the range -X <= imm < X where X=vscale_range_min * N.
'``llvm.experimental.stepvector``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -11245,12 +11245,6 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
unsigned NumElts = VT.getVectorNumElements();
if ((-Imm > NumElts) || (Imm >= NumElts)) {
// Result is undefined if immediate is out-of-bounds.
setValue(&I, DAG.getUNDEF(VT));
return;
}
uint64_t Idx = (NumElts + Imm) % NumElts;
// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.

View File

@ -5352,6 +5352,24 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::experimental_vector_splice: {
VectorType *VecTy = cast<VectorType>(Call.getType());
int64_t Idx = cast<ConstantInt>(Call.getArgOperand(2))->getSExtValue();
int64_t KnownMinNumElements = VecTy->getElementCount().getKnownMinValue();
if (Call.getParent() && Call.getParent()->getParent()) {
AttributeList Attrs = Call.getParent()->getParent()->getAttributes();
if (Attrs.hasFnAttr(Attribute::VScaleRange))
KnownMinNumElements *= Attrs.getFnAttrs().getVScaleRangeMin();
}
Assert((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) ||
(Idx >= 0 && Idx < KnownMinNumElements),
"The splice index exceeds the range [-VL, VL-1] where VL is the "
"known minimum number of elements in the vector. For scalable "
"vectors the minimum number of elements is determined from "
"vscale_range.",
&Call);
break;
}
case Intrinsic::experimental_stepvector: {
VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&

View File

@ -62,15 +62,6 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 {
ret <16 x float> %res
}
; Verify out-of-bounds index results in undef vector.
define <2 x double> @splice_v2f64_idx_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: splice_v2f64_idx_out_of_bounds:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
ret <2 x double> %res
}
;
; VECTOR_SPLICE (trailing elements)
;
@ -130,15 +121,6 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 {
ret <16 x float> %res
}
; Verify out-of-bounds trailing element count results in undef vector.
define <2 x double> @splice_v2f64_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: splice_v2f64_out_of_bounds:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
ret <2 x double> %res
}
declare <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8>, <2 x i8>, i32)
declare <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8>, <16 x i8>, i32)
declare <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32>, <8 x i32>, i32)

View File

@ -24,7 +24,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vsca
ret <vscale x 16 x i8> %res
}
define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
; CHECK-LABEL: splice_nxv16i8_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255
@ -33,29 +33,6 @@ define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscal
ret <vscale x 16 x i8> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: splice_nxv16i8_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w9, #256
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: cmp x8, #256
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 256)
ret <vscale x 16 x i8> %res
}
define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: splice_nxv8i16_first_idx:
; CHECK: // %bb.0:
@ -65,38 +42,6 @@ define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vsca
ret <vscale x 8 x i16> %res
}
define <vscale x 8 x i16> @splice_nxv8i16_last_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: splice_nxv8i16_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 127)
ret <vscale x 8 x i16> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: splice_nxv8i16_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cnth x8
; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 128)
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: splice_nxv4i32_first_idx:
; CHECK: // %bb.0:
@ -106,7 +51,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vsca
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #1 {
; CHECK-LABEL: splice_nxv4i32_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
@ -115,29 +60,6 @@ define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscal
ret <vscale x 4 x i32> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: splice_nxv4i32_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 64)
ret <vscale x 4 x i32> %res
}
define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: splice_nxv2i64_first_idx:
; CHECK: // %bb.0:
@ -147,7 +69,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vsca
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #1 {
; CHECK-LABEL: splice_nxv2i64_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
@ -156,29 +78,6 @@ define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscal
ret <vscale x 2 x i64> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: splice_nxv2i64_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 32)
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
; CHECK-LABEL: splice_nxv2f16_neg_idx:
; CHECK: // %bb.0:
@ -219,7 +118,7 @@ define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vs
ret <vscale x 2 x half> %res
}
define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #1 {
; CHECK-LABEL: splice_nxv2f16_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
@ -228,31 +127,6 @@ define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vsc
ret <vscale x 2 x half> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 2 x half> @splice_nxv2f16_clamped_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
; CHECK-LABEL: splice_nxv2f16_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 32)
ret <vscale x 2 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
; CHECK-LABEL: splice_nxv4f16_neg_idx:
; CHECK: // %bb.0:
@ -293,7 +167,7 @@ define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vs
ret <vscale x 4 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #1 {
; CHECK-LABEL: splice_nxv4f16_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
@ -302,31 +176,6 @@ define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vsc
ret <vscale x 4 x half> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 4 x half> @splice_nxv4f16_clamped_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
; CHECK-LABEL: splice_nxv4f16_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 64)
ret <vscale x 4 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: splice_nxv8f16_first_idx:
; CHECK: // %bb.0:
@ -336,7 +185,7 @@ define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vs
ret <vscale x 8 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #1 {
; CHECK-LABEL: splice_nxv8f16_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
@ -345,29 +194,6 @@ define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vsc
ret <vscale x 8 x half> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: splice_nxv8f16_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cnth x8
; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 128)
ret <vscale x 8 x half> %res
}
define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
; CHECK-LABEL: splice_nxv2f32_neg_idx:
; CHECK: // %bb.0:
@ -408,7 +234,7 @@ define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <
ret <vscale x 2 x float> %res
}
define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #1 {
; CHECK-LABEL: splice_nxv2f32_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
@ -417,31 +243,6 @@ define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <v
ret <vscale x 2 x float> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 2 x float> @splice_nxv2f32_clamped_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
; CHECK-LABEL: splice_nxv2f32_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 32)
ret <vscale x 2 x float> %res
}
define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: splice_nxv4f32_first_idx:
; CHECK: // %bb.0:
@ -451,7 +252,7 @@ define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <
ret <vscale x 4 x float> %res
}
define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #1 {
; CHECK-LABEL: splice_nxv4f32_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
@ -460,29 +261,6 @@ define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <v
ret <vscale x 4 x float> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: splice_nxv4f32_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 64)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: splice_nxv2f64_first_idx:
; CHECK: // %bb.0:
@ -492,7 +270,7 @@ define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a,
ret <vscale x 2 x double> %res
}
define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
; CHECK-LABEL: splice_nxv2f64_last_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
@ -501,29 +279,6 @@ define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a,
ret <vscale x 2 x double> %res
}
; Ensure index is clamped when we cannot prove it's less than 2048-bit.
define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: splice_nxv2f64_clamped_idx:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 32)
ret <vscale x 2 x double> %res
}
; Ensure predicate based splice is promoted to use ZPRs.
define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
; CHECK-LABEL: splice_nxv2i1_idx:
@ -617,8 +372,8 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
}
; Verify splitvec type legalisation works as expected.
define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
; CHECK-LABEL: splice_nxv16f32_clamped_idx:
define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
; CHECK-LABEL: splice_nxv16f32_16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8
@ -684,9 +439,8 @@ define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16
ret <vscale x 16 x i8> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 16 x i8> @splice_nxv16i8_clamped(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: splice_nxv16i8_clamped:
define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
; CHECK-LABEL: splice_nxv16i8_neg17:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -739,9 +493,8 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
ret <vscale x 8 x i16> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 8 x i16> @splice_nxv8i16_clamped(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: splice_nxv8i16_clamped:
define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {
; CHECK-LABEL: splice_nxv8i16_neg9:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -794,9 +547,8 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
ret <vscale x 4 x i32> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 4 x i32> @splice_nxv4i32_clamped(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: splice_nxv4i32_clamped:
define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {
; CHECK-LABEL: splice_nxv4i32_neg5:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -849,9 +601,8 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
ret <vscale x 2 x i64> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 2 x i64> @splice_nxv2i64_clamped(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: splice_nxv2i64_clamped:
define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {
; CHECK-LABEL: splice_nxv2i64_neg3:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -904,9 +655,8 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
ret <vscale x 8 x half> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 8 x half> @splice_nxv8f16_clamped(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: splice_nxv8f16_clamped:
define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {
; CHECK-LABEL: splice_nxv8f16_neg9:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -959,9 +709,8 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
ret <vscale x 4 x float> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 4 x float> @splice_nxv4f32_clamped(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: splice_nxv4f32_clamped:
define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {
; CHECK-LABEL: splice_nxv4f32_neg5:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -1014,9 +763,8 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
ret <vscale x 2 x double> %res
}
; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: splice_nxv2f64_clamped:
define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {
; CHECK-LABEL: splice_nxv2f64_neg3:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
@ -1147,8 +895,8 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
}
; Verify splitvec type legalisation works as expected.
define <vscale x 16 x float> @splice_nxv16f32_clamped(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
; CHECK-LABEL: splice_nxv16f32_clamped:
define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
; CHECK-LABEL: splice_nxv16f32_neg17:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8
@ -1198,3 +946,5 @@ declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale
declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
attributes #0 = { nounwind "target-features"="+sve" }
attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }

View File

@ -0,0 +1,37 @@
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
define <2 x double> @splice_v2f64_idx_neg3(<2 x double> %a, <2 x double> %b) #0 {
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
ret <2 x double> %res
}
; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
define <vscale x 2 x double> @splice_nxv2f64_idx_neg3_vscale_min1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
ret <vscale x 2 x double> %res
}
; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
define <vscale x 2 x double> @splice_nxv2f64_idx_neg5_vscale_min2(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -5)
ret <vscale x 2 x double> %res
}
; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
define <2 x double> @splice_v2f64_idx2(<2 x double> %a, <2 x double> %b) #0 {
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
ret <2 x double> %res
}
; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
define <2 x double> @splice_v2f64_idx3(<2 x double> %a, <2 x double> %b) #1 {
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 4)
ret <2 x double> %res
}
attributes #0 = { vscale_range(1,16) }
attributes #1 = { vscale_range(2,16) }
declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32)
declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)