[AArch64] Regenerate some test checks. NFC

This patch just reruns the update_llc_test_checks script on the AArch64
tests claiming to be updated by the script, cleaning up the output.
This commit is contained in:
David Green 2021-09-08 11:08:32 +01:00
parent c01b76e733
commit caabf2a445
16 changed files with 321 additions and 351 deletions

View File

@ -9,6 +9,16 @@ target triple = "aarch64-unknown-linux-gnu"
; here, only that this case no longer causes said crash.
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-LABEL: dupext_crashtest:
; CHECK: // %bb.0: // %for.body.lr.ph
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: dup v0.2s, w8
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d1, [x8]
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: b .LBB0_1
for.body.lr.ph:
%conv314 = zext i32 %e to i64
br label %vector.memcheck

View File

@ -11,7 +11,7 @@ define half @Const0() {
;
; CHECK-ZCZ-LABEL: Const0:
; CHECK-ZCZ: // %bb.0: // %entry
; CHECK-ZCZ-NEXT: movi d0, #0
; CHECK-ZCZ-NEXT: movi d0, #0000000000000000
; CHECK-ZCZ-NEXT: ret
;
; CHECK-NOFP16-LABEL: Const0:

View File

@ -10,8 +10,7 @@ declare half @llvm.fma.f16(half, half, half) #1
define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@ -23,8 +22,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@ -36,8 +34,7 @@ entry:
define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -48,8 +45,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -60,8 +56,7 @@ entry:
define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfma_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@ -74,8 +69,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmaq_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@ -88,8 +82,7 @@ entry:
define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla h0, h1, v2.h[0]
; CHECK-NEXT: ret
@ -101,8 +94,7 @@ entry:
define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla h0, h1, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -113,8 +105,7 @@ entry:
define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@ -127,8 +118,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@ -141,8 +131,7 @@ entry:
define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -154,8 +143,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -167,8 +155,7 @@ entry:
define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfms_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@ -182,8 +169,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmsq_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@ -197,8 +183,7 @@ entry:
define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls h0, h1, v2.h[0]
; CHECK-NEXT: ret
@ -211,8 +196,7 @@ entry:
define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls h0, h1, v2.h[0]
; CHECK-NEXT: ret
entry:
@ -224,8 +208,7 @@ entry:
define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmul_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
@ -236,8 +219,7 @@ entry:
define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulq_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
@ -248,8 +230,7 @@ entry:
define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
@ -261,8 +242,7 @@ entry:
define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
entry:
@ -273,8 +253,7 @@ entry:
define dso_local half @t_vmulx_f16(half %a, half %b) {
; CHECK-LABEL: t_vmulx_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx h0, h0, h1
; CHECK-NEXT: ret
entry:
@ -284,8 +263,7 @@ entry:
define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx h0, h0, v1.h[3]
; CHECK-NEXT: ret
@ -297,8 +275,7 @@ entry:
define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
@ -310,8 +287,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_lane_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
@ -323,8 +299,7 @@ entry:
define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
@ -335,8 +310,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
@ -347,8 +321,7 @@ entry:
define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_laneq_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx h0, h0, v1.h[7]
; CHECK-NEXT: ret
entry:
@ -359,8 +332,7 @@ entry:
define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
; CHECK-LABEL: t_vmulx_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
; CHECK-NEXT: dup v1.4h, v1.h[0]
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h
@ -374,8 +346,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
; CHECK-LABEL: t_vmulxq_n_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
; CHECK-NEXT: dup v1.8h, v1.h[0]
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h
@ -389,8 +360,7 @@ entry:
define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
; CHECK-LABEL: t_vfmah_lane3_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla h0, h1, v2.h[3]
; CHECK-NEXT: ret
@ -402,8 +372,7 @@ entry:
define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) {
; CHECK-LABEL: t_vfmah_laneq7_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla h0, h1, v2.h[7]
; CHECK-NEXT: ret
entry:
@ -414,8 +383,7 @@ entry:
define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
; CHECK-LABEL: t_vfmsh_lane3_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls h0, h1, v2.h[3]
; CHECK-NEXT: ret
@ -428,8 +396,7 @@ entry:
define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) {
; CHECK-LABEL: t_vfmsh_laneq7_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls h0, h1, v2.h[7]
; CHECK-NEXT: ret
entry:
@ -441,8 +408,7 @@ entry:
define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
; CHECK-LABEL: t_fadd_vfmah_f16:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
; CHECK-NEXT: fmla h0, h1, v2.h[3]
; CHECK-NEXT: ret

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64eb-unknown | FileCheck %s
; RUN: llc < %s -mtriple=aarch64_be-unknown | FileCheck %s
; i8* p; // p is 4 byte aligned
; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
@ -207,7 +207,6 @@ define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-NEXT: ldur w8, [x0, #1]
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 4
@ -238,7 +237,6 @@ define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-NEXT: ldur w8, [x0, #-4]
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
%tmp2 = load i8, i8* %tmp1, align 4
@ -268,7 +266,6 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldur w0, [x0, #1]
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
%tmp2 = load i8, i8* %tmp1, align 1
@ -298,7 +295,6 @@ define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldur w0, [x0, #-4]
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
%tmp2 = load i8, i8* %tmp1, align 1
@ -449,7 +445,6 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@ -472,7 +467,6 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #8
; CHECK-NEXT: bfi w0, w9, #16, #8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@ -496,7 +490,6 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: bfi w0, w9, #24, #8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@ -516,7 +509,6 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w0, [x0]
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@ -539,7 +531,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #8
; CHECK-NEXT: bfi w0, w9, #16, #8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@ -563,7 +554,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: bfi w0, w9, #24, #8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@ -590,7 +580,6 @@ define i16 @load_i16_from_nonzero_offset(i8* %p) {
; CHECK-NEXT: ldrb w0, [x0, #2]
; CHECK-NEXT: bfi w0, w8, #8, #24
; CHECK-NEXT: ret
%p1.i16 = bitcast i8* %p to i16*
%p2.i8 = getelementptr i8, i8* %p, i64 2
%v1 = load i16, i16* %p1.i16

View File

@ -12,7 +12,7 @@ define i32 @a() {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl [[OUTLINED_DIRECT:OUTLINED_FUNCTION_[0-9]+]]
; CHECK-NEXT: bl OUTLINED_FUNCTION_1
; CHECK-NEXT: add w0, w0, #8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -28,7 +28,7 @@ define i32 @b() {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl [[OUTLINED_DIRECT]]
; CHECK-NEXT: bl OUTLINED_FUNCTION_1
; CHECK-NEXT: add w0, w0, #88
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -44,7 +44,7 @@ define hidden i32 @c(i32 (i32, i32, i32, i32)* %fptr) {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl [[OUTLINED_INDIRECT:OUTLINED_FUNCTION_[0-9]+]]
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: add w0, w0, #8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -60,7 +60,7 @@ define hidden i32 @d(i32 (i32, i32, i32, i32)* %fptr) {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl [[OUTLINED_INDIRECT]]
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: add w0, w0, #88
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -70,8 +70,8 @@ entry:
ret i32 %add
}
; CHECK: [[OUTLINED_INDIRECT]]:
; CHECK-SAME: // @[[OUTLINED_INDIRECT]] Thunk
; CHECK: OUTLINED_FUNCTION_0:
; CHECK-SAME: // @OUTLINED_FUNCTION_0 Thunk
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: mov w0, #1
@ -80,8 +80,8 @@ entry:
; CHECK-NEXT: mov w3, #4
; CHECK-NEXT: br x8
; CHECK: [[OUTLINED_DIRECT]]:
; CHECK-SAME: // @[[OUTLINED_DIRECT]] Thunk
; CHECK: OUTLINED_FUNCTION_1:
; CHECK-SAME: // @OUTLINED_FUNCTION_1 Thunk
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, #1
; CHECK-NEXT: mov w1, #2

View File

@ -50,22 +50,24 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv32i1:
; CHECK-SELDAG-LABEL: reverse_nxv32i1:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev p2.b, p1.b
; CHECK-SELDAG-NEXT: rev p1.b, p0.b
; CHECK-SELDAG-NEXT: mov p0.b, p2.b
; CHECK-SELDAG-NEXT: ret
;
; CHECK-FASTISEL-LABEL: reverse_nxv32i1:
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl]
; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl]
; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
@ -158,22 +160,24 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
; CHECK-LABEL: reverse_nxv8i32:
; CHECK-SELDAG-LABEL: reverse_nxv8i32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
; CHECK-SELDAG-NEXT: rev z1.s, z0.s
; CHECK-SELDAG-NEXT: mov z0.d, z2.d
; CHECK-SELDAG-NEXT: ret
;
; CHECK-FASTISEL-LABEL: reverse_nxv8i32:
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str z1, [sp]
; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp]
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str z1, [sp] // 16-byte Folded Spill
; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
@ -182,7 +186,7 @@ define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
; CHECK-LABEL: reverse_nxv16f32:
; CHECK-SELDAG-LABEL: reverse_nxv16f32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z5.s, z3.s
; CHECK-SELDAG-NEXT: rev z4.s, z2.s
@ -191,21 +195,23 @@ define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
; CHECK-SELDAG-NEXT: mov z0.d, z5.d
; CHECK-SELDAG-NEXT: mov z1.d, z4.d
; CHECK-SELDAG-NEXT: ret
;
; CHECK-FASTISEL-LABEL: reverse_nxv16f32:
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl]
; CHECK-FASTISEL-NEXT: str z2, [sp]
; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
; CHECK-FASTISEL-NEXT: ldr z1, [sp]
; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl]
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-FASTISEL-NEXT: str z2, [sp] // 16-byte Folded Spill
; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
; CHECK-FASTISEL-NEXT: ldr z1, [sp] // 16-byte Folded Reload
; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)

View File

@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- -debug 2>&1 | FileCheck %s
; REQUIRES: asserts

View File

@ -18,8 +18,8 @@ define i32 @test1(i32 %x, i64 %y) {
define i64 @test2(i32 %x, i64 %y) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg w[[REG:[0-9]+]], w0
; CHECK-NEXT: asr x0, x1, x[[REG]]
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: asr x0, x1, x8
; CHECK-NEXT: ret
%sub9 = sub nsw i32 64, %x
%sh_prom12.i = zext i32 %sub9 to i64

View File

@ -5,6 +5,7 @@
; PR20558
; Load the stack guard for the second time, just in case the previous value gets spilled.
define i32 @test_stack_guard_remat2() ssp {
; CHECK-LABEL: test_stack_guard_remat2:
; CHECK: ; %bb.0: ; %entry
@ -17,7 +18,6 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
; CHECK-NEXT: Lloh1:
; Load the stack guard for the second time, just in case the previous value gets spilled.
; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE
; CHECK-NEXT: Lloh2:
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]

View File

@ -1,39 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck --check-prefixes=CHECK,NOSPLIT %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m3 | FileCheck --check-prefixes=CHECK,NOSPLIT %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m3 | FileCheck %s
define void @test_split_f(<4 x float> %val, <4 x float>* %addr) {
; NOSPLIT-LABEL: test_split_f:
; NOSPLIT: // %bb.0:
; NOSPLIT-NEXT: str q0, [x0]
; NOSPLIT-NEXT: ret
;
; SPLIT-LABEL: test_split_f:
; SPLIT: // %bb.0:
; SPLIT-NEXT: rev64 v0.4s, v0.4s
; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; SPLIT-NEXT: st1 { v0.2s }, [x0]
; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; SPLIT-NEXT: add x8, x0, #8 // =8
; SPLIT-NEXT: st1 { v0.2s }, [x8]
; SPLIT-NEXT: ret
; CHECK-LABEL: test_split_f:
; CHECK: // %bb.0:
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
store <4 x float> %val, <4 x float>* %addr, align 8
ret void
}
define void @test_split_d(<2 x double> %val, <2 x double>* %addr) {
; NOSPLIT-LABEL: test_split_d:
; NOSPLIT: // %bb.0:
; NOSPLIT-NEXT: str q0, [x0]
; NOSPLIT-NEXT: ret
;
; SPLIT-LABEL: test_split_d:
; SPLIT: // %bb.0:
; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; SPLIT-NEXT: st1 { v0.2d }, [x0]
; SPLIT-NEXT: ret
; CHECK-LABEL: test_split_d:
; CHECK: // %bb.0:
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
store <2 x double> %val, <2 x double>* %addr, align 8
ret void
}

View File

@ -6,7 +6,7 @@
define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: add_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add z0.b, z0.b, #127
; CHECK-NEXT: add z0.b, z0.b, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
@ -20,7 +20,7 @@ define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: add_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: add z0.h, z0.h, #127
; CHECK-NEXT: add z0.h, z0.h, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
@ -50,7 +50,7 @@ define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: add_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add z0.s, z0.s, #127
; CHECK-NEXT: add z0.s, z0.s, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
@ -80,7 +80,7 @@ define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: add_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add z0.d, z0.d, #127
; CHECK-NEXT: add z0.d, z0.d, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
@ -112,7 +112,7 @@ define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: sub_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.b, z0.b, #127
; CHECK-NEXT: sub z0.b, z0.b, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
@ -126,7 +126,7 @@ define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sub_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.h, z0.h, #127
; CHECK-NEXT: sub z0.h, z0.h, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
@ -156,7 +156,7 @@ define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sub_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.s, z0.s, #127
; CHECK-NEXT: sub z0.s, z0.s, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
@ -186,7 +186,7 @@ define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sub_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.d, z0.d, #127
; CHECK-NEXT: sub z0.d, z0.d, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
@ -216,8 +216,9 @@ define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
; As sub_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_b:
; CHECK: sub z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -230,8 +231,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As sub_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_h:
; CHECK: sub z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -246,10 +248,11 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: sub z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -372,8 +375,9 @@ define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; As smax_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_b:
; CHECK: smax z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: smax z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -386,8 +390,9 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As smax_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_h:
; CHECK: smax z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: smax z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -402,10 +407,11 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: smax z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -528,8 +534,9 @@ define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
; As smin_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_b:
; CHECK: smin z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: smin z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -542,8 +549,9 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As smin_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_h:
; CHECK: smin z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: smin z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -558,10 +566,11 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: smin z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -684,8 +693,9 @@ define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; As umax_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_b:
; CHECK: umax z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: umax z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -698,8 +708,9 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As umax_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_h:
; CHECK: umax z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: umax z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -714,10 +725,11 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: umax z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -840,8 +852,9 @@ define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; As umin_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_b:
; CHECK: umin z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: umin z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -854,8 +867,9 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As umin_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_h:
; CHECK: umin z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: umin z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -870,10 +884,11 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: umin z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -1769,8 +1784,9 @@ define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2
; As lsr_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_b:
; CHECK: lsr z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: lsr z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -1783,8 +1799,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As lsr_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_h:
; CHECK: lsr z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: lsr z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -1799,9 +1816,10 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: lsr z0.s, [[PG]]/m, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -1819,8 +1837,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; As mul_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_b:
; CHECK: mul z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: mul z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -1833,8 +1852,9 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As mul_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_h:
; CHECK: mul z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: mul z0.s, z0.s, #1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -1849,10 +1869,11 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: mul z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)

View File

@ -135,41 +135,37 @@ define <vscale x 2 x i1> @select_nxv2i1(i1 %cond, <vscale x 2 x i1> %a, <vscal
; Integer vector select
define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p,
<vscale x 16 x i8> %dst,
<vscale x 16 x i8> %a) {
define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %dst, <vscale x 16 x i8> %a) {
; CHECK-LABEL: sel_nxv16i8:
; CHECK: mov z0.b, p0/m, z1.b
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %dst
ret <vscale x 16 x i8> %sel
}
define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p,
<vscale x 8 x i16> %dst,
<vscale x 8 x i16> %a) {
define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p, <vscale x 8 x i16> %dst, <vscale x 8 x i16> %a) {
; CHECK-LABEL: sel_nxv8i16:
; CHECK: mov z0.h, p0/m, z1.h
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %a, <vscale x 8 x i16> %dst
ret <vscale x 8 x i16> %sel
}
define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p,
<vscale x 4 x i32> %dst,
<vscale x 4 x i32> %a) {
define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p, <vscale x 4 x i32> %dst, <vscale x 4 x i32> %a) {
; CHECK-LABEL: sel_nxv4i32:
; CHECK: mov z0.s, p0/m, z1.s
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst
ret <vscale x 4 x i32> %sel
}
define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p,
<vscale x 2 x i64> %dst,
<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) {
; CHECK-LABEL: sel_nxv2i64:
; CHECK: mov z0.d, p0/m, z1.d
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %a, <vscale x 2 x i64> %dst
ret <vscale x 2 x i64> %sel
@ -177,41 +173,37 @@ define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p,
; Floating point vector select
define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p,
<vscale x 8 x half> %dst,
<vscale x 8 x half> %a) {
define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p, <vscale x 8 x half> %dst, <vscale x 8 x half> %a) {
; CHECK-LABEL: sel_nxv8f16:
; CHECK: mov z0.h, p0/m, z1.h
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %dst
ret <vscale x 8 x half> %sel
}
define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p,
<vscale x 4 x float> %dst,
<vscale x 4 x float> %a) {
define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p, <vscale x 4 x float> %dst, <vscale x 4 x float> %a) {
; CHECK-LABEL: sel_nxv4f32:
; CHECK: mov z0.s, p0/m, z1.s
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %dst
ret <vscale x 4 x float> %sel
}
define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p,
<vscale x 2 x float> %dst,
<vscale x 2 x float> %a) {
define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p, <vscale x 2 x float> %dst, <vscale x 2 x float> %a) {
; CHECK-LABEL: sel_nxv2f32:
; CHECK: mov z0.d, p0/m, z1.d
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> %a, <vscale x 2 x float> %dst
ret <vscale x 2 x float> %sel
}
define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p,
<vscale x 2 x double> %dst,
<vscale x 2 x double> %a) {
define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p, <vscale x 2 x double> %dst, <vscale x 2 x double> %a) {
; CHECK-LABEL: sel_nxv8f64:
; CHECK: mov z0.d, p0/m, z1.d
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> %a, <vscale x 2 x double> %dst
ret <vscale x 2 x double> %sel
@ -220,13 +212,13 @@ define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p,
; Check icmp+select
define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv2f16
; CHECK-LABEL: icmp_select_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x half> %a, <vscale x 2 x half> %b
@ -234,13 +226,13 @@ define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale
}
define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv2f32
; CHECK-LABEL: icmp_select_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x float> %a, <vscale x 2 x float> %b
@ -248,13 +240,13 @@ define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscal
}
define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv2f64
; CHECK-LABEL: icmp_select_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %b
@ -262,13 +254,13 @@ define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vsc
}
define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv4f16
; CHECK-LABEL: icmp_select_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x half> %a, <vscale x 4 x half> %b
@ -276,13 +268,13 @@ define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale
}
define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv4f32
; CHECK-LABEL: icmp_select_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x float> %a, <vscale x 4 x float> %b
@ -290,13 +282,13 @@ define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscal
}
define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv8f16
; CHECK-LABEL: icmp_select_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x half> %a, <vscale x 8 x half> %b
@ -304,13 +296,13 @@ define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale
}
define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv2i64
; CHECK-LABEL: icmp_select_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
@ -318,13 +310,13 @@ define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x
}
define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv4i32
; CHECK-LABEL: icmp_select_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
@ -332,13 +324,13 @@ define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x
}
define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv8i16
; CHECK-LABEL: icmp_select_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
@ -346,13 +338,13 @@ define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x
}
define <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv16i8
; CHECK-LABEL: icmp_select_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
@ -360,52 +352,52 @@ define <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x
}
define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv2i1
; CHECK-LABEL: icmp_select_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.d, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.d, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b
ret <vscale x 2 x i1> %sel
}
define <vscale x 4 x i1> @icmp_select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv4i1
; CHECK-LABEL: icmp_select_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.s, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.s, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b
ret <vscale x 4 x i1> %sel
}
define <vscale x 8 x i1> @icmp_select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv8i1
; CHECK-LABEL: icmp_select_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.h, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.h, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b
ret <vscale x 8 x i1> %sel
}
define <vscale x 16 x i1> @icmp_select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i64 %x0) {
; CHECK-LABEL: icmp_select_nxv16i1
; CHECK-LABEL: icmp_select_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.b, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: whilelo p2.b, xzr, x8
; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b

View File

@ -233,7 +233,7 @@ define <vscale x 2 x i64> @abs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 2 x i64> @abs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: abs_i64_not_active:
; CHECK: // %bb.0:
; CHECK: abs z0.d, p0/m, z1.d
; CHECK-NEXT: abs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@ -424,7 +424,7 @@ define <vscale x 2 x i64> @cls_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 2 x i64> @cls_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: cls_i64_not_active:
; CHECK: // %bb.0:
; CHECK: cls z0.d, p0/m, z1.d
; CHECK-NEXT: cls z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@ -598,7 +598,7 @@ define <vscale x 2 x double> @fabs_f64_active(<vscale x 2 x double> %a, <vscale
define <vscale x 2 x double> @fabs_f64_not_active(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: fabs_f64_not_active:
; CHECK: // %bb.0:
; CHECK: fabs z0.d, p0/m, z1.d
; CHECK-NEXT: fabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %ret
@ -772,7 +772,7 @@ define <vscale x 2 x i64> @sxtb_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxtb_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxtb_i64_not_active:
; CHECK: // %bb.0:
; CHECK: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@ -888,7 +888,7 @@ define <vscale x 2 x i64> @sxth_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxth_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxth_i64_not_active:
; CHECK: // %bb.0:
; CHECK: sxth z0.d, p0/m, z1.d
; CHECK-NEXT: sxth z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@ -946,7 +946,7 @@ define <vscale x 2 x i64> @sxtw_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxtw_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxtw_i64_not_active:
; CHECK: // %bb.0:
; CHECK: sxtw z0.d, p0/m, z1.d
; CHECK-NEXT: sxtw z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret

View File

@ -404,8 +404,9 @@ define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_b:
; CHECK: uqsub z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@ -418,8 +419,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_h:
; CHECK: uqsub z0.s, z0.s, #1
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@ -434,10 +436,11 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_d:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
; CHECK-DAG: uqsub z0.s, [[PG]]/m, z0.s, [[DUP]].s
; CHECK-NEXT: ret
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)

View File

@ -192,7 +192,7 @@ define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sqabs_i64_not_active:
; CHECK: // %bb.0:
; CHECK: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret

View File

@ -365,7 +365,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; GISEL-NEXT: bl may_throw_neon
; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: .Ltmp4:
; GISEL-NEXT: b .LBB1_1
; GISEL-NEXT: b .LBB1_1
; GISEL-NEXT: .LBB1_1: // %.Lcontinue
; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload