llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extra...

244 lines
14 KiB
LLVM

; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
; All these tests create a vector tuple, insert z5 into one of the elements,
; and finally extracts that element from the wide vector to return it. These
; checks ensure that z5 is always the value that is returned.
;
; Insert into two element tuples
;
; tuple: { tuple2.res0, tuple2.res1 }
; insert z5: { z5 , tuple2.res1 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple2.res0, tuple2.res1 }
; insert z5: { tuple2.res0, z5 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
ret <vscale x 4 x i32> %ext
}
; This test checks the elements _not_ being set aren't changed.
; tuple: { tuple2.res0, tuple2.res1 }
; insert z5: { tuple2.res0, z5 }
; extract z0: ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
; CHECK-NEXT: ret
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
ret <vscale x 4 x i32> %ext
}
; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
ret <vscale x 4 x i32> %ext
}
;
; Insert into three element tuples
;
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5: { z5 , tuple3.res0, tuple3.res2 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5: { tuple3.res0, z5 , tuple3.res2 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5: { tuple3.res0, tuple3.res1, z5 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
ret <vscale x 4 x i32> %ext
}
; This test checks the elements _not_ being set aren't changed.
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5: { tuple3.res0, z5 , tuple3.res2 }
; extract z2: ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
ret <vscale x 4 x i32> %ext
}
; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
; CHECK-NEXT: mov z0.d, z3.d
; CHECK-NEXT: ret
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
ret <vscale x 4 x i32> %ext
}
;
; Insert into four element tuples
;
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
ret <vscale x 4 x i32> %ext
}
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
; extract z5: ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
ret <vscale x 4 x i32> %ext
}
; This test checks the elements _not_ being set aren't changed.
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
; extract z2: ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
ret <vscale x 4 x i32> %ext
}
; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
; CHECK-NEXT: mov z0.d, z3.d
; CHECK-NEXT: ret
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
ret <vscale x 4 x i32> %ext
}
attributes #0 = { nounwind "target-features"="+sve" }
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)