forked from OSchip/llvm-project
[SVE] Use NEON for extract_vector_elt when the index is in range.
Patch also adds missing patterns for unpacked vector types and extracts of element zero. Differential Revision: https://reviews.llvm.org/D87842
This commit is contained in:
parent
87f0b51d68
commit
6457455248
|
@ -483,15 +483,6 @@ let Predicates = [HasSVE] in {
|
|||
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
|
||||
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
|
||||
|
||||
// Use more efficient NEON instructions to extract elements within the NEON
|
||||
// part (first 128bits) of an SVE register.
|
||||
def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
|
||||
(f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
|
||||
def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
|
||||
(f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
|
||||
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
|
||||
(f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;
|
||||
|
||||
// Splat immediate (unpredicated)
|
||||
defm DUP_ZI : sve_int_dup_imm<"dup">;
|
||||
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
|
||||
|
@ -2162,6 +2153,28 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
(DUP_ZR_D $index)),
|
||||
$src)>;
|
||||
|
||||
// Extract element from vector with scalar index
|
||||
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
|
||||
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
|
||||
|
||||
// Extract element from vector with immediate index
|
||||
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
|
||||
|
@ -2173,34 +2186,54 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
|
||||
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
|
||||
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
|
||||
def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
|
||||
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
|
||||
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
|
||||
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
|
||||
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
|
||||
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
|
||||
|
||||
// Extract element from vector with scalar index
|
||||
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
// Extract element from vector with immediate index that's within the bottom 128-bits.
|
||||
let AddedComplexity = 1 in {
|
||||
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
|
||||
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
|
||||
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
|
||||
(i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
|
||||
def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
|
||||
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
|
||||
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
|
||||
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
|
||||
}
|
||||
|
||||
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
|
||||
(LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
|
||||
ZPR:$vec)>;
|
||||
// Extract first element from vector.
|
||||
let AddedComplexity = 2 in {
|
||||
def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
|
||||
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
|
||||
def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
|
||||
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
|
||||
def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
|
||||
(i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
|
||||
def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
|
||||
(i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
|
||||
def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
|
||||
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
|
||||
def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
|
||||
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
|
||||
def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
|
||||
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
|
||||
def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
|
||||
(f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
|
||||
def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
|
||||
(f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
|
||||
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
|
||||
(f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasSVE, HasMatMulInt8] in {
|
||||
|
|
|
@ -1,69 +1,125 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: llc < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) {
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.b, b0
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 16 x i8> %a, i32 0
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) {
|
||||
define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: test_lane15_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umov w0, v0.b[15]
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 16 x i8> %a, i32 15
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: test_lane16_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.b, z0.b[16]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 16 x i8> %a, i32 16
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_8xi16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.h, h0
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 8 x i16> %a, i32 0
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) {
|
||||
define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: test_lane7_8xi16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umov w0, v0.h[7]
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 8 x i16> %a, i32 7
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: test_lane8_8xi16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.h, z0.h[8]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 8 x i16> %a, i32 8
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_4xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, s0
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x i32> %a, i32 0
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) {
|
||||
define i32 @test_lane3_4xi32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: test_lane3_4xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, v0.s[3]
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x i32> %a, i32 3
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @test_lane4_4xi32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: test_lane4_4xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[4]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x i32> %a, i32 4
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_2xi64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, d0
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x i64> %a, i32 0
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
define double @test_lane0_2xf64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: test_lane0_2xf64:
|
||||
define i64 @test_lane1_2xi64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: test_lane1_2xi64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: mov x0, v0.d[1]
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 0
|
||||
ret double %b
|
||||
%b = extractelement <vscale x 2 x i64> %a, i32 1
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
define float @test_lane0_4xf32(<vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: test_lane0_4xf32:
|
||||
define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: test_lane2_2xi64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: mov z0.d, z0.d[2]
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 0
|
||||
ret float %b
|
||||
%b = extractelement <vscale x 2 x i64> %a, i32 2
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
|
||||
define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_8xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
|
@ -72,7 +128,172 @@ define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
|
|||
ret half %b
|
||||
}
|
||||
|
||||
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
|
||||
define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane7_8xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.h, z0.h[7]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 8 x half> %a, i32 7
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane8_8xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.h, z0.h[8]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 8 x half> %a, i32 8
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_4xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x half> %a, i32 0
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane3_4xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[3]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x half> %a, i32 3
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane4_4xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[4]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x half> %a, i32 4
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_2xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x half> %a, i32 0
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane1_2xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[1]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x half> %a, i32 1
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
|
||||
; CHECK-LABEL: test_lane2_2xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[2]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x half> %a, i32 2
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_4xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 0
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane3_4xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[3]
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 3
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane4_4xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[4]
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 4
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_2xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x float> %a, i32 0
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane1_2xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[1]
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x float> %a, i32 1
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
|
||||
; CHECK-LABEL: test_lane2_2xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[2]
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x float> %a, i32 2
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_2xf64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 0
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: test_lane1_2xf64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[1]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 1
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: test_lane2_2xf64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, z0.d[2]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 2
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
|
@ -84,7 +305,7 @@ define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
|
|||
ret i8 %b
|
||||
}
|
||||
|
||||
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
|
||||
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_8xi16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
|
@ -96,7 +317,7 @@ define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
|
|||
ret i16 %b
|
||||
}
|
||||
|
||||
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
|
||||
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_4xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
|
@ -108,7 +329,7 @@ define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
|
|||
ret i32 %b
|
||||
}
|
||||
|
||||
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
|
||||
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_2xi64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
|
@ -120,31 +341,7 @@ define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
|
|||
ret i64 %b
|
||||
}
|
||||
|
||||
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) {
|
||||
; CHECK-LABEL: test_lanex_2xf64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 %x
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) {
|
||||
; CHECK-LABEL: test_lanex_4xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.s, xzr, x8
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 %x
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
|
||||
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_8xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
|
@ -156,41 +353,77 @@ define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
|
|||
ret half %b
|
||||
}
|
||||
|
||||
; Deliberately choose an index that is out-of-bounds
|
||||
define i8 @test_lane64_16xi8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: test_lane64_16xi8:
|
||||
define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_4xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #64
|
||||
; CHECK-NEXT: whilels p0.b, xzr, x8
|
||||
; CHECK-NEXT: lastb w0, p0, z0.b
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.s, xzr, x8
|
||||
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 16 x i8> %a, i32 64
|
||||
ret i8 %b
|
||||
%b = extractelement <vscale x 4 x half> %a, i32 %x
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define double @test_lane9_2xf64(<vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: test_lane9_2xf64:
|
||||
define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_2xf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #9
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x half> %a, i32 %x
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_4xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.s, xzr, x8
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x float> %a, i32 %x
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_2xf32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x float> %a, i32 %x
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
|
||||
; CHECK-LABEL: test_lanex_2xf64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x8, w0
|
||||
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 9
|
||||
%b = extractelement <vscale x 2 x double> %a, i32 %x
|
||||
ret double %b
|
||||
}
|
||||
|
||||
; Deliberately choose an index that is undefined
|
||||
define i32 @test_lane64_4xi32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: test_lane64_4xi32:
|
||||
define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: test_undef_lane_4xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, s0
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%b = extractelement <vscale x 4 x i32> %a, i32 undef
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i8 @extract_of_insert_undef_16xi8(i8 %a) {
|
||||
define i8 @extract_of_insert_undef_16xi8(i8 %a) #0 {
|
||||
; CHECK-LABEL: extract_of_insert_undef_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -199,7 +432,7 @@ define i8 @extract_of_insert_undef_16xi8(i8 %a) {
|
|||
ret i8 %c
|
||||
}
|
||||
|
||||
define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
|
||||
define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
|
||||
; CHECK-LABEL: extract0_of_insert0_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -208,7 +441,7 @@ define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
|
|||
ret i8 %d
|
||||
}
|
||||
|
||||
define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
|
||||
define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
|
||||
; CHECK-LABEL: extract64_of_insert64_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -217,18 +450,17 @@ define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
|
|||
ret i8 %d
|
||||
}
|
||||
|
||||
define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) {
|
||||
define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
|
||||
; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.b, z0.b[3]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: umov w0, v0.b[3]
|
||||
; CHECK-NEXT: ret
|
||||
%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
|
||||
%d = extractelement <vscale x 16 x i8> %c, i32 3
|
||||
ret i8 %d
|
||||
}
|
||||
|
||||
define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
|
||||
define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: test_lane0_zero_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
|
@ -240,7 +472,7 @@ define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
|
|||
; The DAG combiner should fold the extract of a splat to give element zero
|
||||
; of the splat, i.e. %x. If the index is beyond the end of the scalable
|
||||
; vector the result is undefined anyway.
|
||||
define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
|
||||
define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) #0 {
|
||||
; CHECK-LABEL: test_lanex_splat_2xi64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -249,3 +481,5 @@ define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
|
|||
%c = extractelement <vscale x 2 x i64> %b, i32 %y
|
||||
ret i64 %c
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
||||
|
|
|
@ -182,9 +182,8 @@ define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
|
|||
define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||
; CHECK-LABEL: test_insert0_of_extract0_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z1.b, b1
|
||||
; CHECK-NEXT: ptrue p0.b, vl1
|
||||
; CHECK-NEXT: fmov w8, s1
|
||||
; CHECK-NEXT: ptrue p0.b, vl1
|
||||
; CHECK-NEXT: mov z0.b, p0/m, w8
|
||||
; CHECK-NEXT: ret
|
||||
%c = extractelement <vscale x 16 x i8> %b, i32 0
|
||||
|
@ -212,14 +211,13 @@ define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %
|
|||
define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||
; CHECK-LABEL: test_insert3_of_extract1_16xi8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z1.b, z1.b[1]
|
||||
; CHECK-NEXT: mov w8, #3
|
||||
; CHECK-NEXT: index z2.b, #0, #1
|
||||
; CHECK-NEXT: fmov w9, s1
|
||||
; CHECK-NEXT: mov z1.b, w8
|
||||
; CHECK-NEXT: mov w9, #3
|
||||
; CHECK-NEXT: umov w8, v1.b[1]
|
||||
; CHECK-NEXT: index z1.b, #0, #1
|
||||
; CHECK-NEXT: mov z2.b, w9
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b
|
||||
; CHECK-NEXT: mov z0.b, p0/m, w9
|
||||
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
|
||||
; CHECK-NEXT: mov z0.b, p0/m, w8
|
||||
; CHECK-NEXT: ret
|
||||
%c = extractelement <vscale x 16 x i8> %b, i32 1
|
||||
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
|
||||
|
|
|
@ -127,8 +127,7 @@ define i64 @split_extract_8i64_idx(<vscale x 8 x i64> %a, i32 %idx) {
|
|||
define i16 @promote_extract_4i16(<vscale x 4 x i16> %a) {
|
||||
; CHECK-LABEL: promote_extract_4i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.s, z0.s[1]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: mov w0, v0.s[1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = extractelement <vscale x 4 x i16> %a, i32 1
|
||||
ret i16 %ext
|
||||
|
@ -137,8 +136,7 @@ define i16 @promote_extract_4i16(<vscale x 4 x i16> %a) {
|
|||
define i8 @split_extract_32i8(<vscale x 32 x i8> %a) {
|
||||
; CHECK-LABEL: split_extract_32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.b, z0.b[3]
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: umov w0, v0.b[3]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = extractelement <vscale x 32 x i8> %a, i32 3
|
||||
ret i8 %ext
|
||||
|
|
Loading…
Reference in New Issue