[SVE] Use NEON for extract_vector_elt when the index is in range.

Patch also adds missing patterns for unpacked vector types and
extracts of element zero.

Differential Revision: https://reviews.llvm.org/D87842
This commit is contained in:
Paul Walker 2020-09-17 15:04:31 +01:00
parent 87f0b51d68
commit 6457455248
4 changed files with 379 additions and 116 deletions

View File

@ -483,15 +483,6 @@ let Predicates = [HasSVE] in {
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
// Use more efficient NEON instructions to extract elements within the NEON
// part (first 128bits) of an SVE register.
def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
(f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
(f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
(f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;
// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
@ -2162,6 +2153,28 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
(DUP_ZR_D $index)),
$src)>;
// Extract element from vector with scalar index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
// Extract element from vector with immediate index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
@ -2173,34 +2186,54 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
// Extract element from vector with scalar index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
ZPR:$vec)>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
ZPR:$vec)>;
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
ZPR:$vec)>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
(LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
ZPR:$vec)>;
// Extract element from vector with immediate index that's within the bottom 128-bits.
let AddedComplexity = 1 in {
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
(i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
}
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
ZPR:$vec)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
ZPR:$vec)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
ZPR:$vec)>;
// Extract first element from vector.
let AddedComplexity = 2 in {
def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
(i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
(f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
(f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
(f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
}
}
let Predicates = [HasSVE, HasMatMulInt8] in {

View File

@ -1,69 +1,125 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: llc < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) {
target triple = "aarch64-unknown-linux-gnu"
define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, b0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 16 x i8> %a, i32 0
ret i8 %b
}
define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) {
define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane15_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: umov w0, v0.b[15]
; CHECK-NEXT: ret
%b = extractelement <vscale x 16 x i8> %a, i32 15
ret i8 %b
}
define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane16_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[16]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 16 x i8> %a, i32 16
ret i8 %b
}
define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: test_lane0_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x i16> %a, i32 0
ret i16 %b
}
define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) {
define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: test_lane7_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: umov w0, v0.h[7]
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x i16> %a, i32 7
ret i16 %b
}
define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: test_lane8_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[8]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x i16> %a, i32 8
ret i16 %b
}
define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_lane0_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 0
ret i32 %b
}
define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) {
define i32 @test_lane3_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_lane3_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, v0.s[3]
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 3
ret i32 %b
}
define i32 @test_lane4_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_lane4_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[4]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 4
ret i32 %b
}
define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: test_lane0_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x i64> %a, i32 0
ret i64 %b
}
define double @test_lane0_2xf64(<vscale x 2 x double> %a) {
; CHECK-LABEL: test_lane0_2xf64:
define i64 @test_lane1_2xi64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: test_lane1_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: mov x0, v0.d[1]
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 0
ret double %b
%b = extractelement <vscale x 2 x i64> %a, i32 1
ret i64 %b
}
define float @test_lane0_4xf32(<vscale x 4 x float> %a) {
; CHECK-LABEL: test_lane0_4xf32:
define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: test_lane2_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: mov z0.d, z0.d[2]
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 0
ret float %b
%b = extractelement <vscale x 2 x i64> %a, i32 2
ret i64 %b
}
define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
; CHECK-LABEL: test_lane0_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
@ -72,7 +128,172 @@ define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
ret half %b
}
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
; CHECK-LABEL: test_lane7_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[7]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x half> %a, i32 7
ret half %b
}
define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
; CHECK-LABEL: test_lane8_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[8]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x half> %a, i32 8
ret half %b
}
define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
; CHECK-LABEL: test_lane0_4xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x half> %a, i32 0
ret half %b
}
define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
; CHECK-LABEL: test_lane3_4xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x half> %a, i32 3
ret half %b
}
define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
; CHECK-LABEL: test_lane4_4xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[4]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x half> %a, i32 4
ret half %b
}
define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
; CHECK-LABEL: test_lane0_2xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x half> %a, i32 0
ret half %b
}
define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
; CHECK-LABEL: test_lane1_2xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x half> %a, i32 1
ret half %b
}
define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
; CHECK-LABEL: test_lane2_2xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[2]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x half> %a, i32 2
ret half %b
}
define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
; CHECK-LABEL: test_lane0_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 0
ret float %b
}
define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
; CHECK-LABEL: test_lane3_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 3
ret float %b
}
define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
; CHECK-LABEL: test_lane4_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[4]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 4
ret float %b
}
define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
; CHECK-LABEL: test_lane0_2xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x float> %a, i32 0
ret float %b
}
define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
; CHECK-LABEL: test_lane1_2xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x float> %a, i32 1
ret float %b
}
define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
; CHECK-LABEL: test_lane2_2xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[2]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x float> %a, i32 2
ret float %b
}
define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
; CHECK-LABEL: test_lane0_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 0
ret double %b
}
define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
; CHECK-LABEL: test_lane1_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 1
ret double %b
}
define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
; CHECK-LABEL: test_lane2_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[2]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 2
ret double %b
}
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
@ -84,7 +305,7 @@ define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
ret i8 %b
}
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
@ -96,7 +317,7 @@ define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
ret i16 %b
}
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
@ -108,7 +329,7 @@ define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
ret i32 %b
}
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
@ -120,31 +341,7 @@ define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
ret i64 %b
}
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) {
; CHECK-LABEL: test_lanex_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 %x
ret double %b
}
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) {
; CHECK-LABEL: test_lanex_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 %x
ret float %b
}
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
@ -156,41 +353,77 @@ define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
ret half %b
}
; Deliberately choose an index that is out-of-bounds
define i8 @test_lane64_16xi8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: test_lane64_16xi8:
define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64
; CHECK-NEXT: whilels p0.b, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.b
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
%b = extractelement <vscale x 16 x i8> %a, i32 64
ret i8 %b
%b = extractelement <vscale x 4 x half> %a, i32 %x
ret half %b
}
define double @test_lane9_2xf64(<vscale x 2 x double> %a) {
; CHECK-LABEL: test_lane9_2xf64:
define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #9
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x half> %a, i32 %x
ret half %b
}
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x float> %a, i32 %x
ret float %b
}
define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x float> %a, i32 %x
ret float %b
}
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x double> %a, i32 9
%b = extractelement <vscale x 2 x double> %a, i32 %x
ret double %b
}
; Deliberately choose an index that is undefined
define i32 @test_lane64_4xi32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: test_lane64_4xi32:
define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_undef_lane_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 undef
ret i32 %b
}
define i8 @extract_of_insert_undef_16xi8(i8 %a) {
define i8 @extract_of_insert_undef_16xi8(i8 %a) #0 {
; CHECK-LABEL: extract_of_insert_undef_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
@ -199,7 +432,7 @@ define i8 @extract_of_insert_undef_16xi8(i8 %a) {
ret i8 %c
}
define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract0_of_insert0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
@ -208,7 +441,7 @@ define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
ret i8 %d
}
define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract64_of_insert64_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
@ -217,18 +450,17 @@ define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
ret i8 %d
}
define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) {
define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[3]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: umov w0, v0.b[3]
; CHECK-NEXT: ret
%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
%d = extractelement <vscale x 16 x i8> %c, i32 3
ret i8 %d
}
define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane0_zero_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, wzr
@ -240,7 +472,7 @@ define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
; The DAG combiner should fold the extract of a splat to give element zero
; of the splat, i.e. %x. If the index is beyond the end of the scalable
; vector the result is undefined anyway.
define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) #0 {
; CHECK-LABEL: test_lanex_splat_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
@ -249,3 +481,5 @@ define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
%c = extractelement <vscale x 2 x i64> %b, i32 %y
ret i64 %c
}
attributes #0 = { "target-features"="+sve" }

View File

@ -182,9 +182,8 @@ define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert0_of_extract0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, b1
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 0
@ -212,14 +211,13 @@ define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %
define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert3_of_extract1_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, z1.b[1]
; CHECK-NEXT: mov w8, #3
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: mov w9, #3
; CHECK-NEXT: umov w8, v1.b[1]
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b
; CHECK-NEXT: mov z0.b, p0/m, w9
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 1
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3

View File

@ -127,8 +127,7 @@ define i64 @split_extract_8i64_idx(<vscale x 8 x i64> %a, i32 %idx) {
define i16 @promote_extract_4i16(<vscale x 4 x i16> %a) {
; CHECK-LABEL: promote_extract_4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: mov w0, v0.s[1]
; CHECK-NEXT: ret
%ext = extractelement <vscale x 4 x i16> %a, i32 1
ret i16 %ext
@ -137,8 +136,7 @@ define i16 @promote_extract_4i16(<vscale x 4 x i16> %a) {
define i8 @split_extract_32i8(<vscale x 32 x i8> %a) {
; CHECK-LABEL: split_extract_32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[3]
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: umov w0, v0.b[3]
; CHECK-NEXT: ret
%ext = extractelement <vscale x 32 x i8> %a, i32 3
ret i8 %ext