forked from OSchip/llvm-project
[SVE][CodeGen] Restructure SVE fixed length tests to use update_llc_test_checks.
Most tests have been updated to make use of vscale_range to reduce the number of RUN lines. For the remaining RUN lines the check prefixes have been updated to ensure the original expectation of the manual CHECK lines is maintained after update_llc_test_checks is run.
This commit is contained in:
parent
af6ec9200b
commit
fcd058acc9
File diff suppressed because it is too large
Load Diff
|
@ -1,31 +1,17 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) #0 {
|
define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v4i16:
|
; CHECK-LABEL: bitcast_v4i16:
|
||||||
; CHECK: ldr d0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str d0, [x1]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str d0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <4 x i16>, <4 x i16>* %a
|
%load = load volatile <4 x i16>, <4 x i16>* %a
|
||||||
%cast = bitcast <4 x i16> %load to <4 x half>
|
%cast = bitcast <4 x i16> %load to <4 x half>
|
||||||
store volatile <4 x half> %cast, <4 x half>* %b
|
store volatile <4 x half> %cast, <4 x half>* %b
|
||||||
|
@ -33,23 +19,25 @@ define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @bitcast_v8i16(<8 x i16> *%a, <8 x half>* %b) #0 {
|
define void @bitcast_v8i16(<8 x i16> *%a, <8 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v8i16:
|
; CHECK-LABEL: bitcast_v8i16:
|
||||||
; CHECK: ldr q0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str q0, [x1]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str q0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <8 x i16>, <8 x i16>* %a
|
%load = load volatile <8 x i16>, <8 x i16>* %a
|
||||||
%cast = bitcast <8 x i16> %load to <8 x half>
|
%cast = bitcast <8 x i16> %load to <8 x half>
|
||||||
store volatile <8 x half> %cast, <8 x half>* %b
|
store volatile <8 x half> %cast, <8 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) #0 {
|
define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v16i16:
|
; CHECK-LABEL: bitcast_v16i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: st1h { [[OP]].h }, [[PG]], [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <16 x i16>, <16 x i16>* %a
|
%load = load volatile <16 x i16>, <16 x i16>* %a
|
||||||
%cast = bitcast <16 x i16> %load to <16 x half>
|
%cast = bitcast <16 x i16> %load to <16 x half>
|
||||||
store volatile <16 x half> %cast, <16 x half>* %b
|
store volatile <16 x half> %cast, <16 x half>* %b
|
||||||
|
@ -57,35 +45,48 @@ define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v32i16(<32 x i16> *%a, <32 x half>* %b) #0 {
|
define void @bitcast_v32i16(<32 x i16> *%a, <32 x half>* %b) #0 {
|
||||||
; CHECK-LABEL: bitcast_v32i16:
|
; VBITS_GE_256-LABEL: bitcast_v32i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-NEXT: st1h { [[OP]].h }, [[PG]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: bitcast_v32i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%load = load volatile <32 x i16>, <32 x i16>* %a
|
%load = load volatile <32 x i16>, <32 x i16>* %a
|
||||||
%cast = bitcast <32 x i16> %load to <32 x half>
|
%cast = bitcast <32 x i16> %load to <32 x half>
|
||||||
store volatile <32 x half> %cast, <32 x half>* %b
|
store volatile <32 x half> %cast, <32 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v64i16(<64 x i16> *%a, <64 x half>* %b) #0 {
|
define void @bitcast_v64i16(<64 x i16> *%a, <64 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v64i16:
|
; CHECK-LABEL: bitcast_v64i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: st1h { [[OP]].h }, [[PG]], [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <64 x i16>, <64 x i16>* %a
|
%load = load volatile <64 x i16>, <64 x i16>* %a
|
||||||
%cast = bitcast <64 x i16> %load to <64 x half>
|
%cast = bitcast <64 x i16> %load to <64 x half>
|
||||||
store volatile <64 x half> %cast, <64 x half>* %b
|
store volatile <64 x half> %cast, <64 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) #0 {
|
define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v128i16:
|
; CHECK-LABEL: bitcast_v128i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: st1h { [[OP]].h }, [[PG]], [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <128 x i16>, <128 x i16>* %a
|
%load = load volatile <128 x i16>, <128 x i16>* %a
|
||||||
%cast = bitcast <128 x i16> %load to <128 x half>
|
%cast = bitcast <128 x i16> %load to <128 x half>
|
||||||
store volatile <128 x half> %cast, <128 x half>* %b
|
store volatile <128 x half> %cast, <128 x half>* %b
|
||||||
|
@ -93,11 +94,12 @@ define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) #0 {
|
define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v2i32:
|
; CHECK-LABEL: bitcast_v2i32:
|
||||||
; CHECK: ldr d0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str d0, [x1]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str d0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <2 x i32>, <2 x i32>* %a
|
%load = load volatile <2 x i32>, <2 x i32>* %a
|
||||||
%cast = bitcast <2 x i32> %load to <2 x float>
|
%cast = bitcast <2 x i32> %load to <2 x float>
|
||||||
store volatile <2 x float> %cast, <2 x float>* %b
|
store volatile <2 x float> %cast, <2 x float>* %b
|
||||||
|
@ -105,23 +107,25 @@ define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @bitcast_v4i32(<4 x i32> *%a, <4 x float>* %b) #0 {
|
define void @bitcast_v4i32(<4 x i32> *%a, <4 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v4i32:
|
; CHECK-LABEL: bitcast_v4i32:
|
||||||
; CHECK: ldr q0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str q0, [x1]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str q0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <4 x i32>, <4 x i32>* %a
|
%load = load volatile <4 x i32>, <4 x i32>* %a
|
||||||
%cast = bitcast <4 x i32> %load to <4 x float>
|
%cast = bitcast <4 x i32> %load to <4 x float>
|
||||||
store volatile <4 x float> %cast, <4 x float>* %b
|
store volatile <4 x float> %cast, <4 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) #0 {
|
define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v8i32:
|
; CHECK-LABEL: bitcast_v8i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: st1w { [[OP]].s }, [[PG]], [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <8 x i32>, <8 x i32>* %a
|
%load = load volatile <8 x i32>, <8 x i32>* %a
|
||||||
%cast = bitcast <8 x i32> %load to <8 x float>
|
%cast = bitcast <8 x i32> %load to <8 x float>
|
||||||
store volatile <8 x float> %cast, <8 x float>* %b
|
store volatile <8 x float> %cast, <8 x float>* %b
|
||||||
|
@ -129,35 +133,48 @@ define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v16i32(<16 x i32> *%a, <16 x float>* %b) #0 {
|
define void @bitcast_v16i32(<16 x i32> *%a, <16 x float>* %b) #0 {
|
||||||
; CHECK-LABEL: bitcast_v16i32:
|
; VBITS_GE_256-LABEL: bitcast_v16i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: st1w { [[OP]].s }, [[PG]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: bitcast_v16i32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%load = load volatile <16 x i32>, <16 x i32>* %a
|
%load = load volatile <16 x i32>, <16 x i32>* %a
|
||||||
%cast = bitcast <16 x i32> %load to <16 x float>
|
%cast = bitcast <16 x i32> %load to <16 x float>
|
||||||
store volatile <16 x float> %cast, <16 x float>* %b
|
store volatile <16 x float> %cast, <16 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v32i32(<32 x i32> *%a, <32 x float>* %b) #0 {
|
define void @bitcast_v32i32(<32 x i32> *%a, <32 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v32i32:
|
; CHECK-LABEL: bitcast_v32i32:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: st1w { [[OP]].s }, [[PG]], [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <32 x i32>, <32 x i32>* %a
|
%load = load volatile <32 x i32>, <32 x i32>* %a
|
||||||
%cast = bitcast <32 x i32> %load to <32 x float>
|
%cast = bitcast <32 x i32> %load to <32 x float>
|
||||||
store volatile <32 x float> %cast, <32 x float>* %b
|
store volatile <32 x float> %cast, <32 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) #0 {
|
define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v64i32:
|
; CHECK-LABEL: bitcast_v64i32:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: st1w { [[OP]].s }, [[PG]], [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <64 x i32>, <64 x i32>* %a
|
%load = load volatile <64 x i32>, <64 x i32>* %a
|
||||||
%cast = bitcast <64 x i32> %load to <64 x float>
|
%cast = bitcast <64 x i32> %load to <64 x float>
|
||||||
store volatile <64 x float> %cast, <64 x float>* %b
|
store volatile <64 x float> %cast, <64 x float>* %b
|
||||||
|
@ -165,11 +182,12 @@ define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) #0 {
|
define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v1i64:
|
; CHECK-LABEL: bitcast_v1i64:
|
||||||
; CHECK: ldr d0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str d0, [x1]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str d0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <1 x i64>, <1 x i64>* %a
|
%load = load volatile <1 x i64>, <1 x i64>* %a
|
||||||
%cast = bitcast <1 x i64> %load to <1 x double>
|
%cast = bitcast <1 x i64> %load to <1 x double>
|
||||||
store volatile <1 x double> %cast, <1 x double>* %b
|
store volatile <1 x double> %cast, <1 x double>* %b
|
||||||
|
@ -177,23 +195,25 @@ define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @bitcast_v2i64(<2 x i64> *%a, <2 x double>* %b) #0 {
|
define void @bitcast_v2i64(<2 x i64> *%a, <2 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v2i64:
|
; CHECK-LABEL: bitcast_v2i64:
|
||||||
; CHECK: ldr q0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: str q0, [x1]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: str q0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <2 x i64>, <2 x i64>* %a
|
%load = load volatile <2 x i64>, <2 x i64>* %a
|
||||||
%cast = bitcast <2 x i64> %load to <2 x double>
|
%cast = bitcast <2 x i64> %load to <2 x double>
|
||||||
store volatile <2 x double> %cast, <2 x double>* %b
|
store volatile <2 x double> %cast, <2 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) #0 {
|
define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v4i64:
|
; CHECK-LABEL: bitcast_v4i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: st1d { [[OP]].d }, [[PG]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <4 x i64>, <4 x i64>* %a
|
%load = load volatile <4 x i64>, <4 x i64>* %a
|
||||||
%cast = bitcast <4 x i64> %load to <4 x double>
|
%cast = bitcast <4 x i64> %load to <4 x double>
|
||||||
store volatile <4 x double> %cast, <4 x double>* %b
|
store volatile <4 x double> %cast, <4 x double>* %b
|
||||||
|
@ -201,35 +221,48 @@ define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v8i64(<8 x i64> *%a, <8 x double>* %b) #0 {
|
define void @bitcast_v8i64(<8 x i64> *%a, <8 x double>* %b) #0 {
|
||||||
; CHECK-LABEL: bitcast_v8i64:
|
; VBITS_GE_256-LABEL: bitcast_v8i64:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: st1d { [[OP]].d }, [[PG]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: bitcast_v8i64:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%load = load volatile <8 x i64>, <8 x i64>* %a
|
%load = load volatile <8 x i64>, <8 x i64>* %a
|
||||||
%cast = bitcast <8 x i64> %load to <8 x double>
|
%cast = bitcast <8 x i64> %load to <8 x double>
|
||||||
store volatile <8 x double> %cast, <8 x double>* %b
|
store volatile <8 x double> %cast, <8 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v16i64(<16 x i64> *%a, <16 x double>* %b) #0 {
|
define void @bitcast_v16i64(<16 x i64> *%a, <16 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v16i64:
|
; CHECK-LABEL: bitcast_v16i64:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[OP]].d }, [[PG]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <16 x i64>, <16 x i64>* %a
|
%load = load volatile <16 x i64>, <16 x i64>* %a
|
||||||
%cast = bitcast <16 x i64> %load to <16 x double>
|
%cast = bitcast <16 x i64> %load to <16 x double>
|
||||||
store volatile <16 x double> %cast, <16 x double>* %b
|
store volatile <16 x double> %cast, <16 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitcast_v32i64(<32 x i64> *%a, <32 x double>* %b) #0 {
|
define void @bitcast_v32i64(<32 x i64> *%a, <32 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitcast_v32i64:
|
; CHECK-LABEL: bitcast_v32i64:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[OP]].d }, [[PG]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load volatile <32 x i64>, <32 x i64>* %a
|
%load = load volatile <32 x i64>, <32 x i64>* %a
|
||||||
%cast = bitcast <32 x i64> %load to <32 x double>
|
%cast = bitcast <32 x i64> %load to <32 x double>
|
||||||
store volatile <32 x double> %cast, <32 x double>* %b
|
store volatile <32 x double> %cast, <32 x double>* %b
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,25 +1,10 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
define <4 x i32> @load_zext_v4i16i32(<4 x i16>* %ap) #0 {
|
define <4 x i32> @load_zext_v4i16i32(<4 x i16>* %ap) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: load_zext_v4i16i32:
|
; CHECK-LABEL: load_zext_v4i16i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -49,7 +34,7 @@ define <2 x i256> @load_zext_v2i64i256(<2 x i64>* %ap) #0 {
|
||||||
ret <2 x i256> %val
|
ret <2 x i256> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i32> @load_zext_v8i16i32(<8 x i16>* %ap) #0 {
|
define <8 x i32> @load_zext_v8i16i32(<8 x i16>* %ap) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: load_zext_v8i16i32:
|
; CHECK-LABEL: load_zext_v8i16i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -61,103 +46,43 @@ define <8 x i32> @load_zext_v8i16i32(<8 x i16>* %ap) #0 {
|
||||||
ret <8 x i32> %val
|
ret <8 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i32> @load_zext_v16i16i32(<16 x i16>* %ap) #0 {
|
define <16 x i32> @load_zext_v16i16i32(<16 x i16>* %ap) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v16i16i32:
|
; CHECK-LABEL: load_zext_v16i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: load_zext_v16i16i32:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
|
|
||||||
; Ensure sensible type legalistaion
|
|
||||||
%a = load <16 x i16>, <16 x i16>* %ap
|
%a = load <16 x i16>, <16 x i16>* %ap
|
||||||
%val = zext <16 x i16> %a to <16 x i32>
|
%val = zext <16 x i16> %a to <16 x i32>
|
||||||
ret <16 x i32> %val
|
ret <16 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i32> @load_zext_v32i16i32(<32 x i16>* %ap) #0 {
|
define <32 x i32> @load_zext_v32i16i32(<32 x i16>* %ap) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v32i16i32:
|
; CHECK-LABEL: load_zext_v32i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: load_zext_v32i16i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <32 x i16>, <32 x i16>* %ap
|
%a = load <32 x i16>, <32 x i16>* %ap
|
||||||
%val = zext <32 x i16> %a to <32 x i32>
|
%val = zext <32 x i16> %a to <32 x i32>
|
||||||
ret <32 x i32> %val
|
ret <32 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <64 x i32> @load_zext_v64i16i32(<64 x i16>* %ap) #0 {
|
define <64 x i32> @load_zext_v64i16i32(<64 x i16>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v64i16i32:
|
; VBITS_GE_1024-LABEL: load_zext_v64i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; VBITS_GE_1024-NEXT: mov x9, #32
|
||||||
; VBITS_GE_256-NEXT: mov x11, #48
|
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
; VBITS_GE_1024-NEXT: uunpklo z1.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1]
|
; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x11, lsl #1]
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: uunpklo z4.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z5.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z6.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #56
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z7.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_zext_v64i16i32:
|
; VBITS_GE_2048-LABEL: load_zext_v64i16i32:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -170,7 +95,7 @@ define <64 x i32> @load_zext_v64i16i32(<64 x i16>* %ap) #0 {
|
||||||
ret <64 x i32> %val
|
ret <64 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @load_sext_v4i16i32(<4 x i16>* %ap) #0 {
|
define <4 x i32> @load_sext_v4i16i32(<4 x i16>* %ap) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: load_sext_v4i16i32:
|
; CHECK-LABEL: load_sext_v4i16i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -181,7 +106,7 @@ define <4 x i32> @load_sext_v4i16i32(<4 x i16>* %ap) #0 {
|
||||||
ret <4 x i32> %val
|
ret <4 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i32> @load_sext_v8i16i32(<8 x i16>* %ap) #0 {
|
define <8 x i32> @load_sext_v8i16i32(<8 x i16>* %ap) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: load_sext_v8i16i32:
|
; CHECK-LABEL: load_sext_v8i16i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -193,103 +118,43 @@ define <8 x i32> @load_sext_v8i16i32(<8 x i16>* %ap) #0 {
|
||||||
ret <8 x i32> %val
|
ret <8 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i32> @load_sext_v16i16i32(<16 x i16>* %ap) #0 {
|
define <16 x i32> @load_sext_v16i16i32(<16 x i16>* %ap) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v16i16i32:
|
; CHECK-LABEL: load_sext_v16i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: load_sext_v16i16i32:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_512-NEXT: ld1sh { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
|
|
||||||
; Ensure sensible type legalistaion
|
|
||||||
%a = load <16 x i16>, <16 x i16>* %ap
|
%a = load <16 x i16>, <16 x i16>* %ap
|
||||||
%val = sext <16 x i16> %a to <16 x i32>
|
%val = sext <16 x i16> %a to <16 x i32>
|
||||||
ret <16 x i32> %val
|
ret <16 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i32> @load_sext_v32i16i32(<32 x i16>* %ap) #0 {
|
define <32 x i32> @load_sext_v32i16i32(<32 x i16>* %ap) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v32i16i32:
|
; CHECK-LABEL: load_sext_v32i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: load_sext_v32i16i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1sh { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <32 x i16>, <32 x i16>* %ap
|
%a = load <32 x i16>, <32 x i16>* %ap
|
||||||
%val = sext <32 x i16> %a to <32 x i32>
|
%val = sext <32 x i16> %a to <32 x i32>
|
||||||
ret <32 x i32> %val
|
ret <32 x i32> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
define <64 x i32> @load_sext_v64i16i32(<64 x i16>* %ap) #0 {
|
define <64 x i32> @load_sext_v64i16i32(<64 x i16>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v64i16i32:
|
; VBITS_GE_1024-LABEL: load_sext_v64i16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; VBITS_GE_1024-NEXT: mov x9, #32
|
||||||
; VBITS_GE_256-NEXT: mov x11, #48
|
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
; VBITS_GE_1024-NEXT: sunpklo z1.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1]
|
; VBITS_GE_1024-NEXT: sunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x11, lsl #1]
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: sunpklo z4.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z5.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z6.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #56
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z7.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_sext_v64i16i32:
|
; VBITS_GE_2048-LABEL: load_sext_v64i16i32:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -303,52 +168,22 @@ define <64 x i32> @load_sext_v64i16i32(<64 x i16>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_zext_v32i8i64(<32 x i8>* %ap) #0 {
|
define <32 x i64> @load_zext_v32i8i64(<32 x i8>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v32i8i64:
|
; VBITS_GE_1024-LABEL: load_zext_v32i8i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; VBITS_GE_1024-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; VBITS_GE_1024-NEXT: uunpklo z1.h, z0.b
|
||||||
; VBITS_GE_256-NEXT: ushll2 v2.8h, v0.16b, #0
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #16
|
||||||
; VBITS_GE_256-NEXT: ushll v1.8h, v0.8b, #0
|
; VBITS_GE_1024-NEXT: uunpklo z0.h, z0.b
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v2.16b, v2.16b, #8
|
; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
|
; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s
|
||||||
; VBITS_GE_256-NEXT: ushll2 v4.8h, v0.16b, #0
|
; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z2.s
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: ext v4.16b, v4.16b, v4.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.s, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_zext_v32i8i64:
|
; VBITS_GE_2048-LABEL: load_zext_v32i8i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -362,52 +197,22 @@ define <32 x i64> @load_zext_v32i8i64(<32 x i8>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_sext_v32i8i64(<32 x i8>* %ap) #0 {
|
define <32 x i64> @load_sext_v32i8i64(<32 x i8>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v32i8i64:
|
; VBITS_GE_1024-LABEL: load_sext_v32i8i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; VBITS_GE_1024-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; VBITS_GE_1024-NEXT: sunpklo z1.h, z0.b
|
||||||
; VBITS_GE_256-NEXT: sshll2 v2.8h, v0.16b, #0
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #16
|
||||||
; VBITS_GE_256-NEXT: sshll v1.8h, v0.8b, #0
|
; VBITS_GE_1024-NEXT: sunpklo z0.h, z0.b
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
; VBITS_GE_1024-NEXT: sunpklo z1.s, z1.h
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v2.16b, v2.16b, #8
|
; VBITS_GE_1024-NEXT: sunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
|
; VBITS_GE_1024-NEXT: sunpklo z1.d, z1.s
|
||||||
; VBITS_GE_256-NEXT: sshll2 v4.8h, v0.16b, #0
|
; VBITS_GE_1024-NEXT: sunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: ext v4.16b, v4.16b, v4.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.s, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_sext_v32i8i64:
|
; VBITS_GE_2048-LABEL: load_sext_v32i8i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -421,50 +226,20 @@ define <32 x i64> @load_sext_v32i8i64(<32 x i8>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_zext_v32i16i64(<32 x i16>* %ap) #0 {
|
define <32 x i64> @load_zext_v32i16i64(<32 x i16>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v32i16i64:
|
; VBITS_GE_1024-LABEL: load_zext_v32i16i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: uunpklo z1.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #32
|
||||||
; VBITS_GE_256-NEXT: ext v2.16b, v0.16b, v0.16b, #8
|
; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h
|
; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s
|
; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: ext v5.16b, v1.16b, v1.16b, #8
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3]
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z4.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: ext v6.16b, v1.16b, v1.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z5.s, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z5.d, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #28
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #12
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.s, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.d, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_zext_v32i16i64:
|
; VBITS_GE_2048-LABEL: load_zext_v32i16i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -478,50 +253,20 @@ define <32 x i64> @load_zext_v32i16i64(<32 x i16>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_sext_v32i16i64(<32 x i16>* %ap) #0 {
|
define <32 x i64> @load_sext_v32i16i64(<32 x i16>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v32i16i64:
|
; VBITS_GE_1024-LABEL: load_sext_v32i16i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: sunpklo z1.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #32
|
||||||
; VBITS_GE_256-NEXT: ext v2.16b, v0.16b, v0.16b, #8
|
; VBITS_GE_1024-NEXT: sunpklo z0.s, z0.h
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h
|
; VBITS_GE_1024-NEXT: sunpklo z1.d, z1.s
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.d, z3.s
|
; VBITS_GE_1024-NEXT: sunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: ext v5.16b, v1.16b, v1.16b, #8
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3]
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z4.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: ext v6.16b, v1.16b, v1.16b, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z5.s, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z5.d, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.s, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #28
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #12
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.s, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.d, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_sext_v32i16i64:
|
; VBITS_GE_2048-LABEL: load_sext_v32i16i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -535,42 +280,18 @@ define <32 x i64> @load_sext_v32i16i64(<32 x i16>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_zext_v32i32i64(<32 x i32>* %ap) #0 {
|
define <32 x i64> @load_zext_v32i32i64(<32 x i32>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_zext_v32i32i64:
|
; VBITS_GE_1024-LABEL: load_zext_v32i32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: mov x11, #24
|
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
; VBITS_GE_1024-NEXT: uunpklo z1.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: uunpklo z4.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z5.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z6.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x8, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #4
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z7.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_zext_v32i32i64:
|
; VBITS_GE_2048-LABEL: load_zext_v32i32i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
@ -584,42 +305,18 @@ define <32 x i64> @load_zext_v32i32i64(<32 x i32>* %ap) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i64> @load_sext_v32i32i64(<32 x i32>* %ap) #0 {
|
define <32 x i64> @load_sext_v32i32i64(<32 x i32>* %ap) #0 {
|
||||||
; VBITS_GE_256-LABEL: load_sext_v32i32i64:
|
; VBITS_GE_1024-LABEL: load_sext_v32i32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_1024: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; VBITS_GE_1024-NEXT: mov x9, #16
|
||||||
; VBITS_GE_256-NEXT: mov x11, #24
|
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
; VBITS_GE_1024-NEXT: sunpklo z1.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
; VBITS_GE_1024-NEXT: sunpklo z0.d, z0.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
|
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_1024-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: sunpklo z4.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z5.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z6.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x8, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #4
|
|
||||||
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z7.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: sunpklo z3.d, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x8, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_2048-LABEL: load_sext_v32i32i64:
|
; VBITS_GE_2048-LABEL: load_sext_v32i32i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
|
|
@ -1,28 +1,14 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; i8
|
; i8
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) #0 {
|
define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v8i8:
|
; CHECK-LABEL: extract_subvector_v8i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b
|
; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b
|
||||||
|
@ -32,7 +18,7 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) #0 {
|
define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v16i8:
|
; CHECK-LABEL: extract_subvector_v16i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -42,7 +28,7 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) #0 {
|
||||||
ret <8 x i8> %ret
|
ret <8 x i8> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v32i8(<32 x i8>* %a, <16 x i8>* %b) #0 {
|
define void @extract_subvector_v32i8(<32 x i8>* %a, <16 x i8>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v32i8:
|
; CHECK-LABEL: extract_subvector_v32i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
|
@ -79,62 +65,30 @@ define void @extract_subvector_v64i8(<64 x i8>* %a, <32 x i8>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v128i8(<128 x i8>* %a, <64 x i8>* %b) #0 {
|
define void @extract_subvector_v128i8(<128 x i8>* %a, <64 x i8>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v128i8:
|
; CHECK-LABEL: extract_subvector_v128i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #64
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_256-NEXT: mov w9, #96
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ptrue p0.b, vl64
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x9]
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov w8, #32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v128i8:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.b, vl128
|
|
||||||
; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.b, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <128 x i8>, <128 x i8>* %a
|
%op = load <128 x i8>, <128 x i8>* %a
|
||||||
%ret = call <64 x i8> @llvm.experimental.vector.extract.v64i8.v128i8(<128 x i8> %op, i64 64)
|
%ret = call <64 x i8> @llvm.experimental.vector.extract.v64i8.v128i8(<128 x i8> %op, i64 64)
|
||||||
store <64 x i8> %ret, <64 x i8>* %b
|
store <64 x i8> %ret, <64 x i8>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) #0 {
|
define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v256i8:
|
; CHECK-LABEL: extract_subvector_v256i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #128
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_256-NEXT: mov w9, #160
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov w10, #224
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_256-NEXT: mov w11, #192
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x9]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0, x10]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x0, x11]
|
|
||||||
; VBITS_GE_256-NEXT: mov w8, #64
|
|
||||||
; VBITS_GE_256-NEXT: mov w9, #96
|
|
||||||
; VBITS_GE_256-NEXT: mov w10, #32
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z3.b }, p0, [x1, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x1, x9]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1, x10]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v256i8:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl256
|
|
||||||
; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <256 x i8>, <256 x i8>* %a
|
%op = load <256 x i8>, <256 x i8>* %a
|
||||||
%ret = call <128 x i8> @llvm.experimental.vector.extract.v128i8.v256i8(<256 x i8> %op, i64 128)
|
%ret = call <128 x i8> @llvm.experimental.vector.extract.v128i8.v256i8(<256 x i8> %op, i64 128)
|
||||||
store <128 x i8> %ret, <128 x i8>* %b
|
store <128 x i8> %ret, <128 x i8>* %b
|
||||||
|
@ -144,7 +98,7 @@ define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) #0 {
|
||||||
; i16
|
; i16
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) #0 {
|
define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4i16:
|
; CHECK-LABEL: extract_subvector_v4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -159,7 +113,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) #0 {
|
define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v8i16:
|
; CHECK-LABEL: extract_subvector_v8i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -169,7 +123,7 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) #0 {
|
||||||
ret <4 x i16> %ret
|
ret <4 x i16> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v16i16(<16 x i16>* %a, <8 x i16>* %b) #0 {
|
define void @extract_subvector_v16i16(<16 x i16>* %a, <8 x i16>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v16i16:
|
; CHECK-LABEL: extract_subvector_v16i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -206,62 +160,30 @@ define void @extract_subvector_v32i16(<32 x i16>* %a, <16 x i16>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v64i16(<64 x i16>* %a, <32 x i16>* %b) #0 {
|
define void @extract_subvector_v64i16(<64 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v64i16:
|
; CHECK-LABEL: extract_subvector_v64i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v64i16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <64 x i16>, <64 x i16>* %a
|
%op = load <64 x i16>, <64 x i16>* %a
|
||||||
%ret = call <32 x i16> @llvm.experimental.vector.extract.v32i16.v64i16(<64 x i16> %op, i64 32)
|
%ret = call <32 x i16> @llvm.experimental.vector.extract.v32i16.v64i16(<64 x i16> %op, i64 32)
|
||||||
store <32 x i16> %ret, <32 x i16>* %b
|
store <32 x i16> %ret, <32 x i16>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v128i16(<128 x i16>* %a, <64 x i16>* %b) #0 {
|
define void @extract_subvector_v128i16(<128 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v128i16:
|
; CHECK-LABEL: extract_subvector_v128i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #64
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #80
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #112
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x11, #96
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v128i16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <128 x i16>, <128 x i16>* %a
|
%op = load <128 x i16>, <128 x i16>* %a
|
||||||
%ret = call <64 x i16> @llvm.experimental.vector.extract.v64i16.v128i16(<128 x i16> %op, i64 64)
|
%ret = call <64 x i16> @llvm.experimental.vector.extract.v64i16.v128i16(<128 x i16> %op, i64 64)
|
||||||
store <64 x i16> %ret, <64 x i16>* %b
|
store <64 x i16> %ret, <64 x i16>* %b
|
||||||
|
@ -271,7 +193,7 @@ define void @extract_subvector_v128i16(<128 x i16>* %a, <64 x i16>* %b) #0 {
|
||||||
; i32
|
; i32
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) #0 {
|
define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v2i32:
|
; CHECK-LABEL: extract_subvector_v2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -282,7 +204,7 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) #0 {
|
define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4i32:
|
; CHECK-LABEL: extract_subvector_v4i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -292,7 +214,7 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) #0 {
|
||||||
ret <2 x i32> %ret
|
ret <2 x i32> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v8i32(<8 x i32>* %a, <4 x i32>* %b) #0 {
|
define void @extract_subvector_v8i32(<8 x i32>* %a, <4 x i32>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v8i32:
|
; CHECK-LABEL: extract_subvector_v8i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -329,62 +251,30 @@ define void @extract_subvector_v16i32(<16 x i32>* %a, <8 x i32>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v32i32(<32 x i32>* %a, <16 x i32>* %b) #0 {
|
define void @extract_subvector_v32i32(<32 x i32>* %a, <16 x i32>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v32i32:
|
; CHECK-LABEL: extract_subvector_v32i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v32i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <32 x i32>, <32 x i32>* %a
|
%op = load <32 x i32>, <32 x i32>* %a
|
||||||
%ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.v32i32(<32 x i32> %op, i64 16)
|
%ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.v32i32(<32 x i32> %op, i64 16)
|
||||||
store <16 x i32> %ret, <16 x i32>* %b
|
store <16 x i32> %ret, <16 x i32>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v64i32(<64 x i32>* %a, <32 x i32>* %b) #0 {
|
define void @extract_subvector_v64i32(<64 x i32>* %a, <32 x i32>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v64i32:
|
; CHECK-LABEL: extract_subvector_v64i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #40
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #56
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x11, #48
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v64i32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <64 x i32>, <64 x i32>* %a
|
%op = load <64 x i32>, <64 x i32>* %a
|
||||||
%ret = call <32 x i32> @llvm.experimental.vector.extract.v32i32.v64i32(<64 x i32> %op, i64 32)
|
%ret = call <32 x i32> @llvm.experimental.vector.extract.v32i32.v64i32(<64 x i32> %op, i64 32)
|
||||||
store <32 x i32> %ret, <32 x i32>* %b
|
store <32 x i32> %ret, <32 x i32>* %b
|
||||||
|
@ -394,7 +284,7 @@ define void @extract_subvector_v64i32(<64 x i32>* %a, <32 x i32>* %b) #0 {
|
||||||
; i64
|
; i64
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) #0 {
|
define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v2i64:
|
; CHECK-LABEL: extract_subvector_v2i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -404,7 +294,7 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) #0 {
|
||||||
ret <1 x i64> %ret
|
ret <1 x i64> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v4i64(<4 x i64>* %a, <2 x i64>* %b) #0 {
|
define void @extract_subvector_v4i64(<4 x i64>* %a, <2 x i64>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4i64:
|
; CHECK-LABEL: extract_subvector_v4i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -418,23 +308,14 @@ define void @extract_subvector_v4i64(<4 x i64>* %a, <2 x i64>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v8i64(<8 x i64>* %a, <4 x i64>* %b) #0 {
|
define void @extract_subvector_v8i64(<8 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v8i64:
|
; CHECK-LABEL: extract_subvector_v8i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: mov x8, #4
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: extract_subvector_v8i64:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
|
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%op = load <8 x i64>, <8 x i64>* %a
|
%op = load <8 x i64>, <8 x i64>* %a
|
||||||
%ret = call <4 x i64> @llvm.experimental.vector.extract.v4i64.v8i64(<8 x i64> %op, i64 4)
|
%ret = call <4 x i64> @llvm.experimental.vector.extract.v4i64.v8i64(<8 x i64> %op, i64 4)
|
||||||
store <4 x i64> %ret, <4 x i64>* %b
|
store <4 x i64> %ret, <4 x i64>* %b
|
||||||
|
@ -453,50 +334,20 @@ define void @extract_subvector_v16i64(<16 x i64>* %a, <8 x i64>* %b) #0 {
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v16i64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <16 x i64>, <16 x i64>* %a
|
%op = load <16 x i64>, <16 x i64>* %a
|
||||||
%ret = call <8 x i64> @llvm.experimental.vector.extract.v8i64.v16i64(<16 x i64> %op, i64 8)
|
%ret = call <8 x i64> @llvm.experimental.vector.extract.v8i64.v16i64(<16 x i64> %op, i64 8)
|
||||||
store <8 x i64> %ret, <8 x i64>* %b
|
store <8 x i64> %ret, <8 x i64>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v32i64(<32 x i64>* %a, <16 x i64>* %b) #0 {
|
define void @extract_subvector_v32i64(<32 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v32i64:
|
; CHECK-LABEL: extract_subvector_v32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: mov x8, #16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #24
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v32i64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <32 x i64>, <32 x i64>* %a
|
%op = load <32 x i64>, <32 x i64>* %a
|
||||||
%ret = call <16 x i64> @llvm.experimental.vector.extract.v16i64.v32i64(<32 x i64> %op, i64 16)
|
%ret = call <16 x i64> @llvm.experimental.vector.extract.v16i64.v32i64(<32 x i64> %op, i64 16)
|
||||||
store <16 x i64> %ret, <16 x i64>* %b
|
store <16 x i64> %ret, <16 x i64>* %b
|
||||||
|
@ -506,7 +357,7 @@ define void @extract_subvector_v32i64(<32 x i64>* %a, <16 x i64>* %b) #0 {
|
||||||
; f16
|
; f16
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x half> @extract_subvector_v4f16(<4 x half> %op) #0 {
|
define <2 x half> @extract_subvector_v4f16(<4 x half> %op) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4f16:
|
; CHECK-LABEL: extract_subvector_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -517,7 +368,7 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x half> @extract_subvector_v8f16(<8 x half> %op) #0 {
|
define <4 x half> @extract_subvector_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v8f16:
|
; CHECK-LABEL: extract_subvector_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -527,7 +378,7 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) #0 {
|
||||||
ret <4 x half> %ret
|
ret <4 x half> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v16f16(<16 x half>* %a, <8 x half>* %b) #0 {
|
define void @extract_subvector_v16f16(<16 x half>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v16f16:
|
; CHECK-LABEL: extract_subvector_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -564,62 +415,30 @@ define void @extract_subvector_v32f16(<32 x half>* %a, <16 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v64f16(<64 x half>* %a, <32 x half>* %b) #0 {
|
define void @extract_subvector_v64f16(<64 x half>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v64f16:
|
; CHECK-LABEL: extract_subvector_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <64 x half>, <64 x half>* %a
|
%op = load <64 x half>, <64 x half>* %a
|
||||||
%ret = call <32 x half> @llvm.experimental.vector.extract.v32f16.v64f16(<64 x half> %op, i64 32)
|
%ret = call <32 x half> @llvm.experimental.vector.extract.v32f16.v64f16(<64 x half> %op, i64 32)
|
||||||
store <32 x half> %ret, <32 x half>* %b
|
store <32 x half> %ret, <32 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v128f16(<128 x half>* %a, <64 x half>* %b) #0 {
|
define void @extract_subvector_v128f16(<128 x half>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v128f16:
|
; CHECK-LABEL: extract_subvector_v128f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #64
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #80
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #112
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x11, #96
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v128f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <128 x half>, <128 x half>* %a
|
%op = load <128 x half>, <128 x half>* %a
|
||||||
%ret = call <64 x half> @llvm.experimental.vector.extract.v64f16.v128f16(<128 x half> %op, i64 64)
|
%ret = call <64 x half> @llvm.experimental.vector.extract.v64f16.v128f16(<128 x half> %op, i64 64)
|
||||||
store <64 x half> %ret, <64 x half>* %b
|
store <64 x half> %ret, <64 x half>* %b
|
||||||
|
@ -629,7 +448,7 @@ define void @extract_subvector_v128f16(<128 x half>* %a, <64 x half>* %b) #0 {
|
||||||
; f32
|
; f32
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x float> @extract_subvector_v2f32(<2 x float> %op) #0 {
|
define <1 x float> @extract_subvector_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v2f32:
|
; CHECK-LABEL: extract_subvector_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -640,7 +459,7 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x float> @extract_subvector_v4f32(<4 x float> %op) #0 {
|
define <2 x float> @extract_subvector_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4f32:
|
; CHECK-LABEL: extract_subvector_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -650,7 +469,7 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) #0 {
|
||||||
ret <2 x float> %ret
|
ret <2 x float> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v8f32(<8 x float>* %a, <4 x float>* %b) #0 {
|
define void @extract_subvector_v8f32(<8 x float>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v8f32:
|
; CHECK-LABEL: extract_subvector_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -687,62 +506,30 @@ define void @extract_subvector_v16f32(<16 x float>* %a, <8 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v32f32(<32 x float>* %a, <16 x float>* %b) #0 {
|
define void @extract_subvector_v32f32(<32 x float>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v32f32:
|
; CHECK-LABEL: extract_subvector_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <32 x float>, <32 x float>* %a
|
%op = load <32 x float>, <32 x float>* %a
|
||||||
%ret = call <16 x float> @llvm.experimental.vector.extract.v16f32.v32f32(<32 x float> %op, i64 16)
|
%ret = call <16 x float> @llvm.experimental.vector.extract.v16f32.v32f32(<32 x float> %op, i64 16)
|
||||||
store <16 x float> %ret, <16 x float>* %b
|
store <16 x float> %ret, <16 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v64f32(<64 x float>* %a, <32 x float>* %b) #0 {
|
define void @extract_subvector_v64f32(<64 x float>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v64f32:
|
; CHECK-LABEL: extract_subvector_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #40
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #56
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x11, #48
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <64 x float>, <64 x float>* %a
|
%op = load <64 x float>, <64 x float>* %a
|
||||||
%ret = call <32 x float> @llvm.experimental.vector.extract.v32f32.v64f32(<64 x float> %op, i64 32)
|
%ret = call <32 x float> @llvm.experimental.vector.extract.v32f32.v64f32(<64 x float> %op, i64 32)
|
||||||
store <32 x float> %ret, <32 x float>* %b
|
store <32 x float> %ret, <32 x float>* %b
|
||||||
|
@ -752,7 +539,7 @@ define void @extract_subvector_v64f32(<64 x float>* %a, <32 x float>* %b) #0 {
|
||||||
; f64
|
; f64
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <1 x double> @extract_subvector_v2f64(<2 x double> %op) #0 {
|
define <1 x double> @extract_subvector_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v2f64:
|
; CHECK-LABEL: extract_subvector_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||||
|
@ -762,7 +549,7 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) #0 {
|
||||||
ret <1 x double> %ret
|
ret <1 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v4f64(<4 x double>* %a, <2 x double>* %b) #0 {
|
define void @extract_subvector_v4f64(<4 x double>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extract_subvector_v4f64:
|
; CHECK-LABEL: extract_subvector_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -799,62 +586,30 @@ define void @extract_subvector_v8f64(<8 x double>* %a, <4 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v16f64(<16 x double>* %a, <8 x double>* %b) #0 {
|
define void @extract_subvector_v16f64(<16 x double>* %a, <8 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v16f64:
|
; CHECK-LABEL: extract_subvector_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: extract_subvector_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_1024-NEXT: ext z0.b, z0.b, z0.b, #64
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op = load <16 x double>, <16 x double>* %a
|
%op = load <16 x double>, <16 x double>* %a
|
||||||
%ret = call <8 x double> @llvm.experimental.vector.extract.v8f64.v16f64(<16 x double> %op, i64 8)
|
%ret = call <8 x double> @llvm.experimental.vector.extract.v8f64.v16f64(<16 x double> %op, i64 8)
|
||||||
store <8 x double> %ret, <8 x double>* %b
|
store <8 x double> %ret, <8 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @extract_subvector_v32f64(<32 x double>* %a, <16 x double>* %b) #0 {
|
define void @extract_subvector_v32f64(<32 x double>* %a, <16 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: extract_subvector_v32f64:
|
; CHECK-LABEL: extract_subvector_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #20
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #28
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x11, #24
|
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: extract_subvector_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_2048-NEXT: ext z0.b, z0.b, z0.b, #128
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op = load <32 x double>, <32 x double>* %a
|
%op = load <32 x double>, <32 x double>* %a
|
||||||
%ret = call <16 x double> @llvm.experimental.vector.extract.v16f64.v32f64(<32 x double> %op, i64 16)
|
%ret = call <16 x double> @llvm.experimental.vector.extract.v16f64.v32f64(<32 x double> %op, i64 16)
|
||||||
store <16 x double> %ret, <16 x double>* %b
|
store <16 x double> %ret, <16 x double>* %b
|
||||||
|
|
|
@ -1,221 +1,259 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
;
|
;
|
||||||
; extractelement
|
; extractelement
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define half @extractelement_v4f16(<4 x half> %op1) #0 {
|
define half @extractelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v4f16:
|
; CHECK-LABEL: extractelement_v4f16:
|
||||||
; CHECK: mov h0, v0.h[3]
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
; CHECK-NEXT: mov h0, v0.h[3]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <4 x half> %op1, i64 3
|
%r = extractelement <4 x half> %op1, i64 3
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define half @extractelement_v8f16(<8 x half> %op1) #0 {
|
define half @extractelement_v8f16(<8 x half> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v8f16:
|
; CHECK-LABEL: extractelement_v8f16:
|
||||||
; CHECK: mov h0, v0.h[7]
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov h0, v0.h[7]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <8 x half> %op1, i64 7
|
%r = extractelement <8 x half> %op1, i64 7
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define half @extractelement_v16f16(<16 x half>* %a) #0 {
|
define half @extractelement_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v16f16:
|
; CHECK-LABEL: extractelement_v16f16:
|
||||||
; VBITS_GE_256: ptrue p0.h, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, z0.h[15]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: mov z0.h, z0.h[15]
|
||||||
|
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x half>, <16 x half>* %a
|
%op1 = load <16 x half>, <16 x half>* %a
|
||||||
%r = extractelement <16 x half> %op1, i64 15
|
%r = extractelement <16 x half> %op1, i64 15
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define half @extractelement_v32f16(<32 x half>* %a) #0 {
|
define half @extractelement_v32f16(<32 x half>* %a) #0 {
|
||||||
; CHECK-LABEL: extractelement_v32f16:
|
; VBITS_GE_256-LABEL: extractelement_v32f16:
|
||||||
; VBITS_GE_512: ptrue p0.h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.h, z0.h[15]
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: extractelement_v32f16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: mov z0.h, z0.h[31]
|
; VBITS_GE_512-NEXT: mov z0.h, z0.h[31]
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <32 x half>, <32 x half>* %a
|
%op1 = load <32 x half>, <32 x half>* %a
|
||||||
%r = extractelement <32 x half> %op1, i64 31
|
%r = extractelement <32 x half> %op1, i64 31
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define half @extractelement_v64f16(<64 x half>* %a) #0 {
|
define half @extractelement_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v64f16:
|
; CHECK-LABEL: extractelement_v64f16:
|
||||||
; VBITS_GE_1024: ptrue p0.h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w8, #63
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #63
|
||||||
; VBITS_GE_1024-NEXT: whilels p0.h, xzr, x8
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: lastb h0, p0, z0.h
|
; CHECK-NEXT: whilels p0.h, xzr, x8
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x half>, <64 x half>* %a
|
%op1 = load <64 x half>, <64 x half>* %a
|
||||||
%r = extractelement <64 x half> %op1, i64 63
|
%r = extractelement <64 x half> %op1, i64 63
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define half @extractelement_v128f16(<128 x half>* %a) #0 {
|
define half @extractelement_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v128f16:
|
; CHECK-LABEL: extractelement_v128f16:
|
||||||
; VBITS_GE_2048: ptrue p0.h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w8, #127
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #127
|
||||||
; VBITS_GE_2048-NEXT: whilels p0.h, xzr, x8
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: lastb h0, p0, z0.h
|
; CHECK-NEXT: whilels p0.h, xzr, x8
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <128 x half>, <128 x half>* %a
|
%op1 = load <128 x half>, <128 x half>* %a
|
||||||
%r = extractelement <128 x half> %op1, i64 127
|
%r = extractelement <128 x half> %op1, i64 127
|
||||||
ret half %r
|
ret half %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define float @extractelement_v2f32(<2 x float> %op1) #0 {
|
define float @extractelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v2f32:
|
; CHECK-LABEL: extractelement_v2f32:
|
||||||
; CHECK: mov s0, v0.s[1]
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
; CHECK-NEXT: mov s0, v0.s[1]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <2 x float> %op1, i64 1
|
%r = extractelement <2 x float> %op1, i64 1
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define float @extractelement_v4f32(<4 x float> %op1) #0 {
|
define float @extractelement_v4f32(<4 x float> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v4f32:
|
; CHECK-LABEL: extractelement_v4f32:
|
||||||
; CHECK: mov s0, v0.s[3]
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov s0, v0.s[3]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <4 x float> %op1, i64 3
|
%r = extractelement <4 x float> %op1, i64 3
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @extractelement_v8f32(<8 x float>* %a) #0 {
|
define float @extractelement_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v8f32:
|
; CHECK-LABEL: extractelement_v8f32:
|
||||||
; VBITS_GE_256: ptrue p0.s, vl8
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, z0.s[7]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: mov z0.s, z0.s[7]
|
||||||
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <8 x float>, <8 x float>* %a
|
%op1 = load <8 x float>, <8 x float>* %a
|
||||||
%r = extractelement <8 x float> %op1, i64 7
|
%r = extractelement <8 x float> %op1, i64 7
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @extractelement_v16f32(<16 x float>* %a) #0 {
|
define float @extractelement_v16f32(<16 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: extractelement_v16f32:
|
; VBITS_GE_256-LABEL: extractelement_v16f32:
|
||||||
; VBITS_GE_512: ptrue p0.s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.s, z0.s[7]
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: extractelement_v16f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: mov z0.s, z0.s[15]
|
; VBITS_GE_512-NEXT: mov z0.s, z0.s[15]
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <16 x float>, <16 x float>* %a
|
%op1 = load <16 x float>, <16 x float>* %a
|
||||||
%r = extractelement <16 x float> %op1, i64 15
|
%r = extractelement <16 x float> %op1, i64 15
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @extractelement_v32f32(<32 x float>* %a) #0 {
|
define float @extractelement_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v32f32:
|
; CHECK-LABEL: extractelement_v32f32:
|
||||||
; VBITS_GE_1024: ptrue p0.s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w8, #31
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #31
|
||||||
; VBITS_GE_1024-NEXT: whilels p0.s, xzr, x8
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: lastb s0, p0, z0.s
|
; CHECK-NEXT: whilels p0.s, xzr, x8
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%r = extractelement <32 x float> %op1, i64 31
|
%r = extractelement <32 x float> %op1, i64 31
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @extractelement_v64f32(<64 x float>* %a) #0 {
|
define float @extractelement_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v64f32:
|
; CHECK-LABEL: extractelement_v64f32:
|
||||||
; VBITS_GE_2048: ptrue p0.s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w8, #63
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #63
|
||||||
; VBITS_GE_2048-NEXT: whilels p0.s, xzr, x8
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: lastb s0, p0, z0.s
|
; CHECK-NEXT: whilels p0.s, xzr, x8
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x float>, <64 x float>* %a
|
%op1 = load <64 x float>, <64 x float>* %a
|
||||||
%r = extractelement <64 x float> %op1, i64 63
|
%r = extractelement <64 x float> %op1, i64 63
|
||||||
ret float %r
|
ret float %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define double @extractelement_v1f64(<1 x double> %op1) #0 {
|
define double @extractelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v1f64:
|
; CHECK-LABEL: extractelement_v1f64:
|
||||||
; CHECK: ret
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <1 x double> %op1, i64 0
|
%r = extractelement <1 x double> %op1, i64 0
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define double @extractelement_v2f64(<2 x double> %op1) #0 {
|
define double @extractelement_v2f64(<2 x double> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v2f64:
|
; CHECK-LABEL: extractelement_v2f64:
|
||||||
; CHECK: mov d0, v0.d[1]
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: mov d0, v0.d[1]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = extractelement <2 x double> %op1, i64 1
|
%r = extractelement <2 x double> %op1, i64 1
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @extractelement_v4f64(<4 x double>* %a) #0 {
|
define double @extractelement_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v4f64:
|
; CHECK-LABEL: extractelement_v4f64:
|
||||||
; VBITS_GE_256: ptrue p0.d, vl4
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, z0.d[3]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: mov z0.d, z0.d[3]
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <4 x double>, <4 x double>* %a
|
%op1 = load <4 x double>, <4 x double>* %a
|
||||||
%r = extractelement <4 x double> %op1, i64 3
|
%r = extractelement <4 x double> %op1, i64 3
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @extractelement_v8f64(<8 x double>* %a) #0 {
|
define double @extractelement_v8f64(<8 x double>* %a) #0 {
|
||||||
; CHECK-LABEL: extractelement_v8f64:
|
; VBITS_GE_256-LABEL: extractelement_v8f64:
|
||||||
; VBITS_GE_512: ptrue p0.d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.d, z0.d[3]
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: extractelement_v8f64:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: mov z0.d, z0.d[7]
|
; VBITS_GE_512-NEXT: mov z0.d, z0.d[7]
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <8 x double>, <8 x double>* %a
|
%op1 = load <8 x double>, <8 x double>* %a
|
||||||
%r = extractelement <8 x double> %op1, i64 7
|
%r = extractelement <8 x double> %op1, i64 7
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @extractelement_v16f64(<16 x double>* %a) #0 {
|
define double @extractelement_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v16f64:
|
; CHECK-LABEL: extractelement_v16f64:
|
||||||
; VBITS_GE_1024: ptrue p0.d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w8, #15
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #15
|
||||||
; VBITS_GE_1024-NEXT: whilels p0.d, xzr, x8
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: lastb d0, p0, z0.d
|
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%r = extractelement <16 x double> %op1, i64 15
|
%r = extractelement <16 x double> %op1, i64 15
|
||||||
ret double %r
|
ret double %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @extractelement_v32f64(<32 x double>* %a) #0 {
|
define double @extractelement_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: extractelement_v32f64:
|
; CHECK-LABEL: extractelement_v32f64:
|
||||||
; VBITS_GE_2048: ptrue p0.d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w8, #31
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: mov w8, #31
|
||||||
; VBITS_GE_2048-NEXT: whilels p0.d, xzr, x8
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: lastb d0, p0, z0.d
|
; CHECK-NEXT: whilels p0.d, xzr, x8
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%r = extractelement <32 x double> %op1, i64 31
|
%r = extractelement <32 x double> %op1, i64 31
|
||||||
ret double %r
|
ret double %r
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep 'z[0-9]'
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -24,7 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
|
define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v4f16:
|
; CHECK-LABEL: fcmp_oeq_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h
|
; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h
|
||||||
|
@ -35,7 +21,7 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
|
define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v8f16:
|
; CHECK-LABEL: fcmp_oeq_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq v0.8h, v0.8h, v1.8h
|
; CHECK-NEXT: fcmeq v0.8h, v0.8h, v1.8h
|
||||||
|
@ -45,7 +31,7 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
|
||||||
ret <8 x i16> %sext
|
ret <8 x i16> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v16f16:
|
; CHECK-LABEL: fcmp_oeq_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -64,7 +50,6 @@ define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
|
define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
|
||||||
; Ensure sensible type legalisation
|
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v32f16:
|
; VBITS_GE_256-LABEL: fcmp_oeq_v32f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
|
@ -98,44 +83,16 @@ define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #0 {
|
define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v64f16:
|
; CHECK-LABEL: fcmp_oeq_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.h }, p0/z, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z2.h, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z1.h, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z3.h, z7.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x2, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcmp_oeq_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x2]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <64 x half>, <64 x half>* %a
|
%op1 = load <64 x half>, <64 x half>* %a
|
||||||
%op2 = load <64 x half>, <64 x half>* %b
|
%op2 = load <64 x half>, <64 x half>* %b
|
||||||
%cmp = fcmp oeq <64 x half> %op1, %op2
|
%cmp = fcmp oeq <64 x half> %op1, %op2
|
||||||
|
@ -144,68 +101,16 @@ define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) #0 {
|
define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v128f16:
|
; CHECK-LABEL: fcmp_oeq_v128f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #96
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #112
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #64
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #80
|
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: mov x12, #32
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: mov x13, #48
|
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.h }, p0/z, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z16.h }, p0/z, [x1, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z17.h }, p0/z, [x1, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z20.h }, p0/z, [x1, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z21.h }, p0/z, [x1, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z19.h }, p0/z, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z22.h }, p0/z, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z18.h }, p0/z, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z23.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z6.h, z17.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z5.h, z16.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z6.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z4.h, z21.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z3.h, z20.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z4.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z2.h, z22.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z1.h, z19.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z18.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z7.h, z23.h
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z7.h, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x2, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x2, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x2, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z6.h }, p0, [x2, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z5.h }, p0, [x2, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z7.h }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcmp_oeq_v128f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x2]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <128 x half>, <128 x half>* %a
|
%op1 = load <128 x half>, <128 x half>* %a
|
||||||
%op2 = load <128 x half>, <128 x half>* %b
|
%op2 = load <128 x half>, <128 x half>* %b
|
||||||
%cmp = fcmp oeq <128 x half> %op1, %op2
|
%cmp = fcmp oeq <128 x half> %op1, %op2
|
||||||
|
@ -215,7 +120,7 @@ define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
|
define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v2f32:
|
; CHECK-LABEL: fcmp_oeq_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
|
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
|
||||||
|
@ -226,7 +131,7 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
|
define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v4f32:
|
; CHECK-LABEL: fcmp_oeq_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
|
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
|
||||||
|
@ -236,7 +141,7 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
|
||||||
ret <4 x i32> %sext
|
ret <4 x i32> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0 {
|
define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v8f32:
|
; CHECK-LABEL: fcmp_oeq_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -255,7 +160,6 @@ define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
|
define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
|
||||||
; Ensure sensible type legalisation
|
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v16f32:
|
; VBITS_GE_256-LABEL: fcmp_oeq_v16f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
|
@ -289,44 +193,16 @@ define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) #0 {
|
define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v32f32:
|
; CHECK-LABEL: fcmp_oeq_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z2.s, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z1.s, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z3.s, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x2, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcmp_oeq_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x2]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%op2 = load <32 x float>, <32 x float>* %b
|
%op2 = load <32 x float>, <32 x float>* %b
|
||||||
%cmp = fcmp oeq <32 x float> %op1, %op2
|
%cmp = fcmp oeq <32 x float> %op1, %op2
|
||||||
|
@ -335,68 +211,16 @@ define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) #0 {
|
define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v64f32:
|
; CHECK-LABEL: fcmp_oeq_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #56
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: mov x13, #24
|
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z16.s }, p0/z, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z17.s }, p0/z, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z20.s }, p0/z, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z21.s }, p0/z, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z19.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z22.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z18.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z23.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z6.s, z17.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z5.s, z16.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z6.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z4.s, z21.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z3.s, z20.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z2.s, z22.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z1.s, z19.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z18.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z7.s, z23.s
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x2, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x2, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x2, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x2, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x2, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcmp_oeq_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x2]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <64 x float>, <64 x float>* %a
|
%op1 = load <64 x float>, <64 x float>* %a
|
||||||
%op2 = load <64 x float>, <64 x float>* %b
|
%op2 = load <64 x float>, <64 x float>* %b
|
||||||
%cmp = fcmp oeq <64 x float> %op1, %op2
|
%cmp = fcmp oeq <64 x float> %op1, %op2
|
||||||
|
@ -406,7 +230,7 @@ define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c)
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
|
define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v1f64:
|
; CHECK-LABEL: fcmp_oeq_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq d0, d0, d1
|
; CHECK-NEXT: fcmeq d0, d0, d1
|
||||||
|
@ -417,7 +241,7 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
|
define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v2f64:
|
; CHECK-LABEL: fcmp_oeq_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d
|
; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d
|
||||||
|
@ -427,7 +251,7 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
|
||||||
ret <2 x i64> %sext
|
ret <2 x i64> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #0 {
|
define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oeq_v4f64:
|
; CHECK-LABEL: fcmp_oeq_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -446,7 +270,6 @@ define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
|
define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
|
||||||
; Ensure sensible type legalisation
|
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v8f64:
|
; VBITS_GE_256-LABEL: fcmp_oeq_v8f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
|
@ -480,44 +303,16 @@ define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) #0 {
|
define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v16f64:
|
; CHECK-LABEL: fcmp_oeq_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z2.d, z5.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, z4.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z6.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z3.d, z7.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x2, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcmp_oeq_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x2]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%op2 = load <16 x double>, <16 x double>* %b
|
%op2 = load <16 x double>, <16 x double>* %b
|
||||||
%cmp = fcmp oeq <16 x double> %op1, %op2
|
%cmp = fcmp oeq <16 x double> %op1, %op2
|
||||||
|
@ -526,68 +321,16 @@ define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) #0 {
|
define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcmp_oeq_v32f64:
|
; CHECK-LABEL: fcmp_oeq_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #28
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: mov x12, #8
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_256-NEXT: mov x13, #12
|
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z16.d }, p0/z, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z17.d }, p0/z, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z20.d }, p0/z, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z21.d }, p0/z, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z19.d }, p0/z, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z22.d }, p0/z, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z18.d }, p0/z, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z23.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z6.d, z17.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z5.d, z16.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z6.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z4.d, z21.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z3.d, z20.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z3.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z4.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z2.d, z22.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, z19.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z2.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z18.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z7.d, z23.d
|
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: mov z7.d, p2/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x2, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x2, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x2, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x2, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x2, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcmp_oeq_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x2]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%op2 = load <32 x double>, <32 x double>* %b
|
%op2 = load <32 x double>, <32 x double>* %b
|
||||||
%cmp = fcmp oeq <32 x double> %op1, %op2
|
%cmp = fcmp oeq <32 x double> %op1, %op2
|
||||||
|
@ -600,7 +343,7 @@ define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %
|
||||||
; FCMP UEQ
|
; FCMP UEQ
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ueq_v16f16:
|
; CHECK-LABEL: fcmp_ueq_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -624,7 +367,7 @@ define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP ONE
|
; FCMP ONE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_one_v16f16:
|
; CHECK-LABEL: fcmp_one_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -648,7 +391,7 @@ define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP UNE
|
; FCMP UNE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_une_v16f16:
|
; CHECK-LABEL: fcmp_une_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -670,7 +413,7 @@ define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP OGT
|
; FCMP OGT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ogt_v16f16:
|
; CHECK-LABEL: fcmp_ogt_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -692,7 +435,7 @@ define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP UGT
|
; FCMP UGT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ugt_v16f16:
|
; CHECK-LABEL: fcmp_ugt_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -716,7 +459,7 @@ define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP OLT
|
; FCMP OLT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_olt_v16f16:
|
; CHECK-LABEL: fcmp_olt_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -738,7 +481,7 @@ define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP ULT
|
; FCMP ULT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ult_v16f16:
|
; CHECK-LABEL: fcmp_ult_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -762,7 +505,7 @@ define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP OGE
|
; FCMP OGE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_oge_v16f16:
|
; CHECK-LABEL: fcmp_oge_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -784,7 +527,7 @@ define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP UGE
|
; FCMP UGE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_uge_v16f16:
|
; CHECK-LABEL: fcmp_uge_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -808,7 +551,7 @@ define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP OLE
|
; FCMP OLE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ole_v16f16:
|
; CHECK-LABEL: fcmp_ole_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -830,7 +573,7 @@ define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP ULE
|
; FCMP ULE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ule_v16f16:
|
; CHECK-LABEL: fcmp_ule_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -854,7 +597,7 @@ define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP UNO
|
; FCMP UNO
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_uno_v16f16:
|
; CHECK-LABEL: fcmp_uno_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -876,7 +619,7 @@ define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP ORD
|
; FCMP ORD
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ord_v16f16:
|
; CHECK-LABEL: fcmp_ord_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -900,7 +643,7 @@ define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
|
||||||
; FCMP EQ
|
; FCMP EQ
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_eq_v16f16:
|
; CHECK-LABEL: fcmp_eq_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -922,7 +665,7 @@ define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0
|
||||||
; FCMP NE
|
; FCMP NE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ne_v16f16:
|
; CHECK-LABEL: fcmp_ne_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -944,7 +687,7 @@ define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0
|
||||||
; FCMP GT
|
; FCMP GT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_gt_v16f16:
|
; CHECK-LABEL: fcmp_gt_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -966,7 +709,7 @@ define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0
|
||||||
; FCMP LT
|
; FCMP LT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_lt_v16f16:
|
; CHECK-LABEL: fcmp_lt_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -988,7 +731,7 @@ define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0
|
||||||
; FCMP GE
|
; FCMP GE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_ge_v16f16:
|
; CHECK-LABEL: fcmp_ge_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -1010,7 +753,7 @@ define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0
|
||||||
; FCMP LE
|
; FCMP LE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
|
define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcmp_le_v16f16:
|
; CHECK-LABEL: fcmp_le_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
@ -1,21 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -24,7 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v2f16_v2f32(<2 x half>* %a, <2 x float>* %b) #0 {
|
define void @fcvt_v2f16_v2f32(<2 x half>* %a, <2 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f16_v2f32:
|
; CHECK-LABEL: fcvt_v2f16_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr s0, [x0]
|
; CHECK-NEXT: ldr s0, [x0]
|
||||||
|
@ -38,7 +24,7 @@ define void @fcvt_v2f16_v2f32(<2 x half>* %a, <2 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @fcvt_v4f16_v4f32(<4 x half>* %a, <4 x float>* %b) #0 {
|
define void @fcvt_v4f16_v4f32(<4 x half>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f16_v4f32:
|
; CHECK-LABEL: fcvt_v4f16_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -51,7 +37,7 @@ define void @fcvt_v4f16_v4f32(<4 x half>* %a, <4 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v8f16_v8f32(<8 x half>* %a, <8 x float>* %b) #0 {
|
define void @fcvt_v8f16_v8f32(<8 x half>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v8f16_v8f32:
|
; CHECK-LABEL: fcvt_v8f16_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -66,7 +52,6 @@ define void @fcvt_v8f16_v8f32(<8 x half>* %a, <8 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f16_v16f32(<16 x half>* %a, <16 x float>* %b) #0 {
|
define void @fcvt_v16f16_v16f32(<16 x half>* %a, <16 x float>* %b) #0 {
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
; VBITS_GE_256-LABEL: fcvt_v16f16_v16f32:
|
; VBITS_GE_256-LABEL: fcvt_v16f16_v16f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
|
@ -86,91 +71,34 @@ define void @fcvt_v16f16_v16f32(<16 x half>* %a, <16 x float>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.h
|
; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.h
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
%op1 = load <16 x half>, <16 x half>* %a
|
%op1 = load <16 x half>, <16 x half>* %a
|
||||||
%res = fpext <16 x half> %op1 to <16 x float>
|
%res = fpext <16 x half> %op1 to <16 x float>
|
||||||
store <16 x float> %res, <16 x float>* %b
|
store <16 x float> %res, <16 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f16_v32f32(<32 x half>* %a, <32 x float>* %b) #0 {
|
define void @fcvt_v32f16_v32f32(<32 x half>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f16_v32f32:
|
; CHECK-LABEL: fcvt_v32f16_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.s }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.s }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.s, p0/m, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.s, p0/m, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v32f16_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.s, p0/m, z0.h
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <32 x half>, <32 x half>* %a
|
%op1 = load <32 x half>, <32 x half>* %a
|
||||||
%res = fpext <32 x half> %op1 to <32 x float>
|
%res = fpext <32 x half> %op1 to <32 x float>
|
||||||
store <32 x float> %res, <32 x float>* %b
|
store <32 x float> %res, <32 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v64f16_v64f32(<64 x half>* %a, <64 x float>* %b) #0 {
|
define void @fcvt_v64f16_v64f32(<64 x half>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v64f16_v64f32:
|
; CHECK-LABEL: fcvt_v64f16_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #56
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #48
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.s }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.s }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.s }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.s }, p0/z, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.s }, p0/z, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.s }, p0/z, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.s, p0/m, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.s, p0/m, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z4.s, p0/m, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z5.s, p0/m, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z6.s, p0/m, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z7.s, p0/m, z7.h
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v64f16_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.s, p0/m, z0.h
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <64 x half>, <64 x half>* %a
|
%op1 = load <64 x half>, <64 x half>* %a
|
||||||
%res = fpext <64 x half> %op1 to <64 x float>
|
%res = fpext <64 x half> %op1 to <64 x float>
|
||||||
store <64 x float> %res, <64 x float>* %b
|
store <64 x float> %res, <64 x float>* %b
|
||||||
|
@ -182,7 +110,7 @@ define void @fcvt_v64f16_v64f32(<64 x half>* %a, <64 x float>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v1f16_v1f64(<1 x half>* %a, <1 x double>* %b) #0 {
|
define void @fcvt_v1f16_v1f64(<1 x half>* %a, <1 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v1f16_v1f64:
|
; CHECK-LABEL: fcvt_v1f16_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr h0, [x0]
|
; CHECK-NEXT: ldr h0, [x0]
|
||||||
|
@ -196,7 +124,7 @@ define void @fcvt_v1f16_v1f64(<1 x half>* %a, <1 x double>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; v2f16 is not legal for NEON, so use SVE
|
; v2f16 is not legal for NEON, so use SVE
|
||||||
define void @fcvt_v2f16_v2f64(<2 x half>* %a, <2 x double>* %b) #0 {
|
define void @fcvt_v2f16_v2f64(<2 x half>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f16_v2f64:
|
; CHECK-LABEL: fcvt_v2f16_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr s0, [x0]
|
; CHECK-NEXT: ldr s0, [x0]
|
||||||
|
@ -212,7 +140,7 @@ define void @fcvt_v2f16_v2f64(<2 x half>* %a, <2 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v4f16_v4f64(<4 x half>* %a, <4 x double>* %b) #0 {
|
define void @fcvt_v4f16_v4f64(<4 x half>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f16_v4f64:
|
; CHECK-LABEL: fcvt_v4f16_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -246,91 +174,34 @@ define void @fcvt_v8f16_v8f64(<8 x half>* %a, <8 x double>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.h
|
; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.h
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
%op1 = load <8 x half>, <8 x half>* %a
|
%op1 = load <8 x half>, <8 x half>* %a
|
||||||
%res = fpext <8 x half> %op1 to <8 x double>
|
%res = fpext <8 x half> %op1 to <8 x double>
|
||||||
store <8 x double> %res, <8 x double>* %b
|
store <8 x double> %res, <8 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f16_v16f64(<16 x half>* %a, <16 x double>* %b) #0 {
|
define void @fcvt_v16f16_v16f64(<16 x half>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v16f16_v16f64:
|
; CHECK-LABEL: fcvt_v16f16_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.d }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.d, p0/m, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.d, p0/m, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v16f16_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.d, p0/m, z0.h
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x half>, <16 x half>* %a
|
%op1 = load <16 x half>, <16 x half>* %a
|
||||||
%res = fpext <16 x half> %op1 to <16 x double>
|
%res = fpext <16 x half> %op1 to <16 x double>
|
||||||
store <16 x double> %res, <16 x double>* %b
|
store <16 x double> %res, <16 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f16_v32f64(<32 x half>* %a, <32 x double>* %b) #0 {
|
define void @fcvt_v32f16_v32f64(<32 x half>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f16_v32f64:
|
; CHECK-LABEL: fcvt_v32f16_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #28
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #24
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.d }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.d }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.d }, p0/z, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.d }, p0/z, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.d }, p0/z, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.d, p0/m, z2.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.d, p0/m, z3.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z4.d, p0/m, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z5.d, p0/m, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z6.d, p0/m, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z7.d, p0/m, z7.h
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v32f16_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.d, p0/m, z0.h
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x half>, <32 x half>* %a
|
%op1 = load <32 x half>, <32 x half>* %a
|
||||||
%res = fpext <32 x half> %op1 to <32 x double>
|
%res = fpext <32 x half> %op1 to <32 x double>
|
||||||
store <32 x double> %res, <32 x double>* %b
|
store <32 x double> %res, <32 x double>* %b
|
||||||
|
@ -342,7 +213,7 @@ define void @fcvt_v32f16_v32f64(<32 x half>* %a, <32 x double>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v1f32_v1f64(<1 x float>* %a, <1 x double>* %b) #0 {
|
define void @fcvt_v1f32_v1f64(<1 x float>* %a, <1 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v1f32_v1f64:
|
; CHECK-LABEL: fcvt_v1f32_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr s0, [x0]
|
; CHECK-NEXT: ldr s0, [x0]
|
||||||
|
@ -356,7 +227,7 @@ define void @fcvt_v1f32_v1f64(<1 x float>* %a, <1 x double>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @fcvt_v2f32_v2f64(<2 x float>* %a, <2 x double>* %b) #0 {
|
define void @fcvt_v2f32_v2f64(<2 x float>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f32_v2f64:
|
; CHECK-LABEL: fcvt_v2f32_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -369,7 +240,7 @@ define void @fcvt_v2f32_v2f64(<2 x float>* %a, <2 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v4f32_v4f64(<4 x float>* %a, <4 x double>* %b) #0 {
|
define void @fcvt_v4f32_v4f64(<4 x float>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f32_v4f64:
|
; CHECK-LABEL: fcvt_v4f32_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -384,7 +255,6 @@ define void @fcvt_v4f32_v4f64(<4 x float>* %a, <4 x double>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v8f32_v8f64(<8 x float>* %a, <8 x double>* %b) #0 {
|
define void @fcvt_v8f32_v8f64(<8 x float>* %a, <8 x double>* %b) #0 {
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
; VBITS_GE_256-LABEL: fcvt_v8f32_v8f64:
|
; VBITS_GE_256-LABEL: fcvt_v8f32_v8f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
|
@ -410,84 +280,28 @@ define void @fcvt_v8f32_v8f64(<8 x float>* %a, <8 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f32_v16f64(<16 x float>* %a, <16 x double>* %b) #0 {
|
define void @fcvt_v16f32_v16f64(<16 x float>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v16f32_v16f64:
|
; CHECK-LABEL: fcvt_v16f32_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.d }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.d, p0/m, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.d, p0/m, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v16f32_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.d, p0/m, z0.s
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x float>, <16 x float>* %a
|
%op1 = load <16 x float>, <16 x float>* %a
|
||||||
%res = fpext <16 x float> %op1 to <16 x double>
|
%res = fpext <16 x float> %op1 to <16 x double>
|
||||||
store <16 x double> %res, <16 x double>* %b
|
store <16 x double> %res, <16 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f32_v32f64(<32 x float>* %a, <32 x double>* %b) #0 {
|
define void @fcvt_v32f32_v32f64(<32 x float>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f32_v32f64:
|
; CHECK-LABEL: fcvt_v32f32_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #28
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #24
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.d }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.d }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.d }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.d }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.d }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.d, p0/m, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.d, p0/m, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z4.d, p0/m, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z5.d, p0/m, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z6.d, p0/m, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z7.d, p0/m, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v32f32_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.d, p0/m, z0.s
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%res = fpext <32 x float> %op1 to <32 x double>
|
%res = fpext <32 x float> %op1 to <32 x double>
|
||||||
store <32 x double> %res, <32 x double>* %b
|
store <32 x double> %res, <32 x double>* %b
|
||||||
|
@ -499,7 +313,7 @@ define void @fcvt_v32f32_v32f64(<32 x float>* %a, <32 x double>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v2f32_v2f16(<2 x float>* %a, <2 x half>* %b) #0 {
|
define void @fcvt_v2f32_v2f16(<2 x float>* %a, <2 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f32_v2f16:
|
; CHECK-LABEL: fcvt_v2f32_v2f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -513,7 +327,7 @@ define void @fcvt_v2f32_v2f16(<2 x float>* %a, <2 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @fcvt_v4f32_v4f16(<4 x float>* %a, <4 x half>* %b) #0 {
|
define void @fcvt_v4f32_v4f16(<4 x float>* %a, <4 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f32_v4f16:
|
; CHECK-LABEL: fcvt_v4f32_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x0]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
@ -526,7 +340,7 @@ define void @fcvt_v4f32_v4f16(<4 x float>* %a, <4 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v8f32_v8f16(<8 x float>* %a, <8 x half>* %b) #0 {
|
define void @fcvt_v8f32_v8f16(<8 x float>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v8f32_v8f16:
|
; CHECK-LABEL: fcvt_v8f32_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -541,7 +355,18 @@ define void @fcvt_v8f32_v8f16(<8 x float>* %a, <8 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f32_v16f16(<16 x float>* %a, <16 x half>* %b) #0 {
|
define void @fcvt_v16f32_v16f16(<16 x float>* %a, <16 x half>* %b) #0 {
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-LABEL: fcvt_v16f32_v16f16:
|
||||||
|
; VBITS_GE_256: // %bb.0:
|
||||||
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.s
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
; VBITS_GE_512-LABEL: fcvt_v16f32_v16f16:
|
; VBITS_GE_512-LABEL: fcvt_v16f32_v16f16:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
@ -555,90 +380,28 @@ define void @fcvt_v16f32_v16f16(<16 x float>* %a, <16 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f32_v32f16(<32 x float>* %a, <32 x half>* %b) #0 {
|
define void @fcvt_v32f32_v32f16(<32 x float>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f32_v32f16:
|
; CHECK-LABEL: fcvt_v32f32_v32f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.h, p0/m, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.h, p0/m, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.s }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v32f32_v32f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.h, p0/m, z0.s
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%res = fptrunc <32 x float> %op1 to <32 x half>
|
%res = fptrunc <32 x float> %op1 to <32 x half>
|
||||||
store <32 x half> %res, <32 x half>* %b
|
store <32 x half> %res, <32 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v64f32_v64f16(<64 x float>* %a, <64 x half>* %b) #0 {
|
define void @fcvt_v64f32_v64f16(<64 x float>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v64f32_v64f16:
|
; CHECK-LABEL: fcvt_v64f32_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #56
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x10, #48
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #24
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #40
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #32
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.s
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.s
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z5
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z4
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z6
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z3
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z3.s
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z2
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z2.s
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z7
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v64f32_v64f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.h, p0/m, z0.s
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <64 x float>, <64 x float>* %a
|
%op1 = load <64 x float>, <64 x float>* %a
|
||||||
%res = fptrunc <64 x float> %op1 to <64 x half>
|
%res = fptrunc <64 x float> %op1 to <64 x half>
|
||||||
store <64 x half> %res, <64 x half>* %b
|
store <64 x half> %res, <64 x half>* %b
|
||||||
|
@ -650,7 +413,7 @@ define void @fcvt_v64f32_v64f16(<64 x float>* %a, <64 x half>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v1f64_v1f16(<1 x double>* %a, <1 x half>* %b) #0 {
|
define void @fcvt_v1f64_v1f16(<1 x double>* %a, <1 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v1f64_v1f16:
|
; CHECK-LABEL: fcvt_v1f64_v1f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -664,7 +427,7 @@ define void @fcvt_v1f64_v1f16(<1 x double>* %a, <1 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; v2f16 is not legal for NEON, so use SVE
|
; v2f16 is not legal for NEON, so use SVE
|
||||||
define void @fcvt_v2f64_v2f16(<2 x double>* %a, <2 x half>* %b) #0 {
|
define void @fcvt_v2f64_v2f16(<2 x double>* %a, <2 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f64_v2f16:
|
; CHECK-LABEL: fcvt_v2f64_v2f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x0]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
@ -680,7 +443,7 @@ define void @fcvt_v2f64_v2f16(<2 x double>* %a, <2 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v4f64_v4f16(<4 x double>* %a, <4 x half>* %b) #0 {
|
define void @fcvt_v4f64_v4f16(<4 x double>* %a, <4 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f64_v4f16:
|
; CHECK-LABEL: fcvt_v4f64_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -695,7 +458,6 @@ define void @fcvt_v4f64_v4f16(<4 x double>* %a, <4 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v8f64_v8f16(<8 x double>* %a, <8 x half>* %b) #0 {
|
define void @fcvt_v8f64_v8f16(<8 x double>* %a, <8 x half>* %b) #0 {
|
||||||
; Ensure sensible type legalisation
|
|
||||||
; VBITS_GE_256-LABEL: fcvt_v8f64_v8f16:
|
; VBITS_GE_256-LABEL: fcvt_v8f64_v8f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
|
@ -726,70 +488,28 @@ define void @fcvt_v8f64_v8f16(<8 x double>* %a, <8 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f64_v16f16(<16 x double>* %a, <16 x half>* %b) #0 {
|
define void @fcvt_v16f64_v16f16(<16 x double>* %a, <16 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v16f64_v16f16:
|
; CHECK-LABEL: fcvt_v16f64_v16f16:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.h, p0/m, z0.d
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [x1]
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%res = fptrunc <16 x double> %op1 to <16 x half>
|
%res = fptrunc <16 x double> %op1 to <16 x half>
|
||||||
store <16 x half> %res, <16 x half>* %b
|
store <16 x half> %res, <16 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f64_v32f16(<32 x double>* %a, <32 x half>* %b) #0 {
|
define void @fcvt_v32f64_v32f16(<32 x double>* %a, <32 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f64_v32f16:
|
; CHECK-LABEL: fcvt_v32f64_v32f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #28
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #12
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #8
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #20
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #16
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.d
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.d
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z5
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z5.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z4
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z4.d
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [x1, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [x1, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z6
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z6.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z3
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z3.d
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [x1, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [x1, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z2
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z2.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z7
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z7.d
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v32f64_v32f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.h, p0/m, z0.d
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%res = fptrunc <32 x double> %op1 to <32 x half>
|
%res = fptrunc <32 x double> %op1 to <32 x half>
|
||||||
store <32 x half> %res, <32 x half>* %b
|
store <32 x half> %res, <32 x half>* %b
|
||||||
|
@ -801,7 +521,7 @@ define void @fcvt_v32f64_v32f16(<32 x double>* %a, <32 x half>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @fcvt_v1f64_v1f32(<1 x double> %op1, <1 x float>* %b) #0 {
|
define void @fcvt_v1f64_v1f32(<1 x double> %op1, <1 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v1f64_v1f32:
|
; CHECK-LABEL: fcvt_v1f64_v1f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -814,7 +534,7 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, <1 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @fcvt_v2f64_v2f32(<2 x double> %op1, <2 x float>* %b) #0 {
|
define void @fcvt_v2f64_v2f32(<2 x double> %op1, <2 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v2f64_v2f32:
|
; CHECK-LABEL: fcvt_v2f64_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fcvtn v0.2s, v0.2d
|
; CHECK-NEXT: fcvtn v0.2s, v0.2d
|
||||||
|
@ -825,7 +545,7 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, <2 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v4f64_v4f32(<4 x double>* %a, <4 x float>* %b) #0 {
|
define void @fcvt_v4f64_v4f32(<4 x double>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fcvt_v4f64_v4f32:
|
; CHECK-LABEL: fcvt_v4f64_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -840,7 +560,18 @@ define void @fcvt_v4f64_v4f32(<4 x double>* %a, <4 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v8f64_v8f32(<8 x double>* %a, <8 x float>* %b) #0 {
|
define void @fcvt_v8f64_v8f32(<8 x double>* %a, <8 x float>* %b) #0 {
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-LABEL: fcvt_v8f64_v8f32:
|
||||||
|
; VBITS_GE_256: // %bb.0:
|
||||||
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.d
|
||||||
|
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.d
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
; VBITS_GE_512-LABEL: fcvt_v8f64_v8f32:
|
; VBITS_GE_512-LABEL: fcvt_v8f64_v8f32:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
@ -854,90 +585,28 @@ define void @fcvt_v8f64_v8f32(<8 x double>* %a, <8 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v16f64_v16f32(<16 x double>* %a, <16 x float>* %b) #0 {
|
define void @fcvt_v16f64_v16f32(<16 x double>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v16f64_v16f32:
|
; CHECK-LABEL: fcvt_v16f64_v16f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.d
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.d
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z2.s, p0/m, z2.d
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z3.s, p0/m, z3.d
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: fcvt_v16f64_v16f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: fcvt z0.s, p0/m, z0.d
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%res = fptrunc <16 x double> %op1 to <16 x float>
|
%res = fptrunc <16 x double> %op1 to <16 x float>
|
||||||
store <16 x float> %res, <16 x float>* %b
|
store <16 x float> %res, <16 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fcvt_v32f64_v32f32(<32 x double>* %a, <32 x float>* %b) #0 {
|
define void @fcvt_v32f64_v32f32(<32 x double>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: fcvt_v32f64_v32f32:
|
; CHECK-LABEL: fcvt_v32f64_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x9, #28
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x10, #24
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #12
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #8
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #20
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #16
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.d
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.d
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z5
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z5.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z4
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z4.d
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z6
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z6.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z3
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z3.d
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z0, z2
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z2.d
|
|
||||||
; VBITS_GE_256-NEXT: movprfx z1, z7
|
|
||||||
; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z7.d
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: fcvt_v32f64_v32f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: fcvt z0.s, p0/m, z0.d
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%res = fptrunc <32 x double> %op1 to <32 x float>
|
%res = fptrunc <32 x double> %op1 to <32 x float>
|
||||||
store <32 x float> %res, <32 x float>* %b
|
store <32 x float> %res, <32 x float>* %b
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -O3 -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s
|
; RUN: llc -O3 -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
|
; RUN: llc -O3 -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
|
; RUN: llc -O3 -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -8,7 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) #0 {
|
define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v4f16:
|
; CHECK-LABEL: fma_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmla v2.4h, v0.4h, v1.4h
|
; CHECK-NEXT: fmla v2.4h, v0.4h, v1.4h
|
||||||
|
@ -20,7 +22,7 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) #0 {
|
define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v8f16:
|
; CHECK-LABEL: fma_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmla v2.8h, v0.8h, v1.8h
|
; CHECK-NEXT: fmla v2.8h, v0.8h, v1.8h
|
||||||
|
@ -31,7 +33,7 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
|
||||||
ret <8 x half> %res
|
ret <8 x half> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) #0 {
|
define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v16f16:
|
; CHECK-LABEL: fma_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -51,15 +53,31 @@ define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 {
|
define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 {
|
||||||
; CHECK-LABEL: fma_v32f16:
|
; VBITS_GE_256-LABEL: fma_v32f16:
|
||||||
; CHECK: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl32
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x2]
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
|
||||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
|
||||||
; CHECK-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x2, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x2]
|
||||||
|
; VBITS_GE_256-NEXT: fmad z0.h, p0/m, z2.h, z4.h
|
||||||
|
; VBITS_GE_256-NEXT: fmad z1.h, p0/m, z3.h, z5.h
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: fma_v32f16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x2]
|
||||||
|
; VBITS_GE_512-NEXT: fmad z0.h, p0/m, z1.h, z2.h
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <32 x half>, <32 x half>* %a
|
%op1 = load <32 x half>, <32 x half>* %a
|
||||||
%op2 = load <32 x half>, <32 x half>* %b
|
%op2 = load <32 x half>, <32 x half>* %b
|
||||||
%op3 = load <32 x half>, <32 x half>* %c
|
%op3 = load <32 x half>, <32 x half>* %c
|
||||||
|
@ -69,7 +87,7 @@ define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) #0 {
|
define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: fma_v64f16:
|
; CHECK-LABEL: fma_v64f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl64
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
|
@ -88,7 +106,7 @@ define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) #0 {
|
define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: fma_v128f16:
|
; CHECK-LABEL: fma_v128f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl128
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
|
@ -108,7 +126,7 @@ define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) #
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) #0 {
|
define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v2f32:
|
; CHECK-LABEL: fma_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s
|
; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s
|
||||||
|
@ -120,7 +138,7 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) #0 {
|
define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v4f32:
|
; CHECK-LABEL: fma_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmla v2.4s, v0.4s, v1.4s
|
; CHECK-NEXT: fmla v2.4s, v0.4s, v1.4s
|
||||||
|
@ -131,7 +149,7 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) #0 {
|
define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v8f32:
|
; CHECK-LABEL: fma_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -151,15 +169,31 @@ define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0 {
|
define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0 {
|
||||||
; CHECK-LABEL: fma_v16f32:
|
; VBITS_GE_256-LABEL: fma_v16f32:
|
||||||
; CHECK: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl16
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2]
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
|
||||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
|
||||||
; CHECK-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x2, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x2]
|
||||||
|
; VBITS_GE_256-NEXT: fmad z0.s, p0/m, z2.s, z4.s
|
||||||
|
; VBITS_GE_256-NEXT: fmad z1.s, p0/m, z3.s, z5.s
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: fma_v16f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x2]
|
||||||
|
; VBITS_GE_512-NEXT: fmad z0.s, p0/m, z1.s, z2.s
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <16 x float>, <16 x float>* %a
|
%op1 = load <16 x float>, <16 x float>* %a
|
||||||
%op2 = load <16 x float>, <16 x float>* %b
|
%op2 = load <16 x float>, <16 x float>* %b
|
||||||
%op3 = load <16 x float>, <16 x float>* %c
|
%op3 = load <16 x float>, <16 x float>* %c
|
||||||
|
@ -169,7 +203,7 @@ define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) #0 {
|
define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: fma_v32f32:
|
; CHECK-LABEL: fma_v32f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl32
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
|
@ -188,7 +222,7 @@ define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) #0
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) #0 {
|
define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: fma_v64f32:
|
; CHECK-LABEL: fma_v64f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl64
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
|
@ -208,7 +242,7 @@ define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) #0
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) #0 {
|
define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v1f64:
|
; CHECK-LABEL: fma_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmadd d0, d0, d1, d2
|
; CHECK-NEXT: fmadd d0, d0, d1, d2
|
||||||
|
@ -219,7 +253,7 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) #0 {
|
define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v2f64:
|
; CHECK-LABEL: fma_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmla v2.2d, v0.2d, v1.2d
|
; CHECK-NEXT: fmla v2.2d, v0.2d, v1.2d
|
||||||
|
@ -230,7 +264,7 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) #0 {
|
define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: fma_v4f64:
|
; CHECK-LABEL: fma_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -250,15 +284,31 @@ define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) #0
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0 {
|
define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0 {
|
||||||
; CHECK-LABEL: fma_v8f64:
|
; VBITS_GE_256-LABEL: fma_v8f64:
|
||||||
; CHECK: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl8
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
|
||||||
; CHECK-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x2, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x2]
|
||||||
|
; VBITS_GE_256-NEXT: fmad z0.d, p0/m, z2.d, z4.d
|
||||||
|
; VBITS_GE_256-NEXT: fmad z1.d, p0/m, z3.d, z5.d
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: fma_v8f64:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x2]
|
||||||
|
; VBITS_GE_512-NEXT: fmad z0.d, p0/m, z1.d, z2.d
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <8 x double>, <8 x double>* %a
|
%op1 = load <8 x double>, <8 x double>* %a
|
||||||
%op2 = load <8 x double>, <8 x double>* %b
|
%op2 = load <8 x double>, <8 x double>* %b
|
||||||
%op3 = load <8 x double>, <8 x double>* %c
|
%op3 = load <8 x double>, <8 x double>* %c
|
||||||
|
@ -268,7 +318,7 @@ define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c) #0 {
|
define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: fma_v16f64:
|
; CHECK-LABEL: fma_v16f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
|
@ -287,7 +337,7 @@ define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @fma_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x double>* %c) #0 {
|
define void @fma_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x double>* %c) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: fma_v32f64:
|
; CHECK-LABEL: fma_v32f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,36 +1,12 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 {
|
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v4f16:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: dup v2.4h, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v4f16:
|
; CHECK-LABEL: select_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -43,15 +19,7 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 {
|
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v8f16:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: dup v2.8h, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v8f16:
|
; CHECK-LABEL: select_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -63,21 +31,7 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 {
|
||||||
ret <8 x half> %sel
|
ret <8 x half> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, i1 %mask) #0 {
|
define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v16f16:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
|
||||||
; NO_SVE-NEXT: ldr q0, [x0]
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q2, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: dup v4.8h, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v2.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v3.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q1, [x0]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v16f16:
|
; CHECK-LABEL: select_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: and w8, w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
|
@ -99,26 +53,24 @@ define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, i1 %mask) #0 {
|
define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, i1 %mask) #0 {
|
||||||
; NO_SVE-LABEL: select_v32f16:
|
; VBITS_GE_256-LABEL: select_v32f16:
|
||||||
; NO_SVE: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #48]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; VBITS_GE_256-NEXT: ptrue p1.h
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #16]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q4, [x1, #48]
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
|
||||||
; NO_SVE-NEXT: dup v6.8h, w8
|
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q5, [x1]
|
; VBITS_GE_256-NEXT: mov z4.h, w9
|
||||||
; NO_SVE-NEXT: ldr q7, [x1, #16]
|
; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #32]
|
; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0
|
||||||
; NO_SVE-NEXT: bif v1.16b, v5.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h
|
||||||
; NO_SVE-NEXT: bif v2.16b, v7.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h
|
||||||
; NO_SVE-NEXT: bif v0.16b, v4.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
; NO_SVE-NEXT: bif v3.16b, v16.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
; NO_SVE-NEXT: stp q1, q2, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; NO_SVE-NEXT: stp q3, q0, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: select_v32f16:
|
; VBITS_GE_512-LABEL: select_v32f16:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -140,58 +92,20 @@ define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, i1 %mask) #0 {
|
define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v64f16:
|
; CHECK-LABEL: select_v64f16:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #16]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #48]
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; CHECK-NEXT: mov z2.h, w8
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #80]
|
; CHECK-NEXT: and z2.h, z2.h, #0x1
|
||||||
; NO_SVE-NEXT: dup v21.8h, w8
|
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #64]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #112]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #96]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v16.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v17.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v18.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v19.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v20.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v16.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v17.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v18.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.h
|
|
||||||
; VBITS_GE_1024-NEXT: mov z2.h, w8
|
|
||||||
; VBITS_GE_1024-NEXT: and z2.h, z2.h, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.h, p1, z0.h, z1.h
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load volatile <64 x half>, <64 x half>* %a
|
%op1 = load volatile <64 x half>, <64 x half>* %a
|
||||||
%op2 = load volatile <64 x half>, <64 x half>* %b
|
%op2 = load volatile <64 x half>, <64 x half>* %b
|
||||||
%sel = select i1 %mask, <64 x half> %op1, <64 x half> %op2
|
%sel = select i1 %mask, <64 x half> %op1, <64 x half> %op2
|
||||||
|
@ -199,103 +113,20 @@ define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) #0 {
|
define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v128f16:
|
; CHECK-LABEL: select_v128f16:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: .cfi_def_cfa_offset 32
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; NO_SVE-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: .cfi_offset b8, -8
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: .cfi_offset b9, -16
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; NO_SVE-NEXT: .cfi_offset b10, -24
|
; CHECK-NEXT: mov z2.h, w8
|
||||||
; NO_SVE-NEXT: .cfi_offset b11, -32
|
; CHECK-NEXT: and z2.h, z2.h, #0x1
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #240]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #224]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #176]
|
|
||||||
; NO_SVE-NEXT: dup v8.8h, w8
|
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #144]
|
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x0, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x0, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x0, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q21, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q22, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q23, [x0]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #240]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #224]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #192]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v24.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #176]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v25.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q29, [x1, #160]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v26.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q30, [x1, #144]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v27.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q31, [x1, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q9, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q10, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v28.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q11, [x1]
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v30.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v31.16b
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v16.16b, v9.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v17.16b, v10.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v18.16b, v28.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v19.16b, v24.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q4, q3, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v20.16b, v25.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v21.16b, v26.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v22.16b, v27.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v23.16b, v11.16b
|
|
||||||
; NO_SVE-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0]
|
|
||||||
; NO_SVE-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v128f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.h
|
|
||||||
; VBITS_GE_2048-NEXT: mov z2.h, w8
|
|
||||||
; VBITS_GE_2048-NEXT: and z2.h, z2.h, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.h, p1, z0.h, z1.h
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load volatile <128 x half>, <128 x half>* %a
|
%op1 = load volatile <128 x half>, <128 x half>* %a
|
||||||
%op2 = load volatile <128 x half>, <128 x half>* %b
|
%op2 = load volatile <128 x half>, <128 x half>* %b
|
||||||
%sel = select i1 %mask, <128 x half> %op1, <128 x half> %op2
|
%sel = select i1 %mask, <128 x half> %op1, <128 x half> %op2
|
||||||
|
@ -304,15 +135,7 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #0 {
|
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v2f32:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: dup v2.2s, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v2f32:
|
; CHECK-LABEL: select_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -325,15 +148,7 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #0 {
|
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v4f32:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: dup v2.4s, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v4f32:
|
; CHECK-LABEL: select_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -345,21 +160,7 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #
|
||||||
ret <4 x float> %sel
|
ret <4 x float> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) #0 {
|
define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v8f32:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
|
||||||
; NO_SVE-NEXT: ldr q0, [x0]
|
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q2, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: dup v4.4s, w8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v2.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v3.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q1, [x0]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v8f32:
|
; CHECK-LABEL: select_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: and w8, w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
|
@ -381,26 +182,24 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
|
define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
|
||||||
; NO_SVE-LABEL: select_v16f32:
|
; VBITS_GE_256-LABEL: select_v16f32:
|
||||||
; NO_SVE: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #48]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; VBITS_GE_256-NEXT: ptrue p1.s
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #16]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q4, [x1, #48]
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
|
||||||
; NO_SVE-NEXT: dup v6.4s, w8
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q5, [x1]
|
; VBITS_GE_256-NEXT: mov z4.s, w9
|
||||||
; NO_SVE-NEXT: ldr q7, [x1, #16]
|
; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #32]
|
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
|
||||||
; NO_SVE-NEXT: bif v1.16b, v5.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
|
||||||
; NO_SVE-NEXT: bif v2.16b, v7.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
|
||||||
; NO_SVE-NEXT: bif v0.16b, v4.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; NO_SVE-NEXT: bif v3.16b, v16.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
; NO_SVE-NEXT: stp q1, q2, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; NO_SVE-NEXT: stp q3, q0, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: select_v16f32:
|
; VBITS_GE_512-LABEL: select_v16f32:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -422,58 +221,20 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) #0 {
|
define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v32f32:
|
; CHECK-LABEL: select_v32f32:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #16]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #48]
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; CHECK-NEXT: mov z2.s, w8
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #80]
|
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||||
; NO_SVE-NEXT: dup v21.4s, w8
|
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #64]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #112]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #96]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v16.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v17.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v18.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v19.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v20.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v16.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v17.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v18.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.s
|
|
||||||
; VBITS_GE_1024-NEXT: mov z2.s, w8
|
|
||||||
; VBITS_GE_1024-NEXT: and z2.s, z2.s, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.s, p1, z0.s, z1.s
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load volatile <32 x float>, <32 x float>* %a
|
%op1 = load volatile <32 x float>, <32 x float>* %a
|
||||||
%op2 = load volatile <32 x float>, <32 x float>* %b
|
%op2 = load volatile <32 x float>, <32 x float>* %b
|
||||||
%sel = select i1 %mask, <32 x float> %op1, <32 x float> %op2
|
%sel = select i1 %mask, <32 x float> %op1, <32 x float> %op2
|
||||||
|
@ -481,103 +242,20 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) #0 {
|
define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v64f32:
|
; CHECK-LABEL: select_v64f32:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: .cfi_def_cfa_offset 32
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; NO_SVE-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: .cfi_offset b8, -8
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: .cfi_offset b9, -16
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; NO_SVE-NEXT: .cfi_offset b10, -24
|
; CHECK-NEXT: mov z2.s, w8
|
||||||
; NO_SVE-NEXT: .cfi_offset b11, -32
|
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #240]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; NO_SVE-NEXT: csetm w8, ne
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #224]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #176]
|
|
||||||
; NO_SVE-NEXT: dup v8.4s, w8
|
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #144]
|
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x0, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x0, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x0, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q21, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q22, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q23, [x0]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #240]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #224]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #192]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v24.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #176]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v25.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q29, [x1, #160]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v26.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q30, [x1, #144]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v27.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q31, [x1, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q9, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q10, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v28.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q11, [x1]
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v30.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v31.16b
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v16.16b, v9.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v17.16b, v10.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v18.16b, v28.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v19.16b, v24.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q4, q3, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v20.16b, v25.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v21.16b, v26.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v22.16b, v27.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v23.16b, v11.16b
|
|
||||||
; NO_SVE-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0]
|
|
||||||
; NO_SVE-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.s
|
|
||||||
; VBITS_GE_2048-NEXT: mov z2.s, w8
|
|
||||||
; VBITS_GE_2048-NEXT: and z2.s, z2.s, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.s, p1, z0.s, z1.s
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load volatile <64 x float>, <64 x float>* %a
|
%op1 = load volatile <64 x float>, <64 x float>* %a
|
||||||
%op2 = load volatile <64 x float>, <64 x float>* %b
|
%op2 = load volatile <64 x float>, <64 x float>* %b
|
||||||
%sel = select i1 %mask, <64 x float> %op1, <64 x float> %op2
|
%sel = select i1 %mask, <64 x float> %op1, <64 x float> %op2
|
||||||
|
@ -586,15 +264,7 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask) #0 {
|
define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v1f64:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
|
||||||
; NO_SVE-NEXT: fmov d2, x8
|
|
||||||
; NO_SVE-NEXT: bif v0.8b, v1.8b, v2.8b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v1f64:
|
; CHECK-LABEL: select_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -607,15 +277,7 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) #0 {
|
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v2f64:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w0, #0x1
|
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
|
||||||
; NO_SVE-NEXT: dup v2.2d, x8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v1.16b, v2.16b
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v2f64:
|
; CHECK-LABEL: select_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -627,21 +289,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
|
||||||
ret <2 x double> %sel
|
ret <2 x double> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) #0 {
|
define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v4f64:
|
|
||||||
; NO_SVE: // %bb.0:
|
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
|
||||||
; NO_SVE-NEXT: ldr q0, [x0]
|
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q2, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: dup v4.2d, x8
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v2.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v3.16b, v4.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q1, [x0]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: select_v4f64:
|
; CHECK-LABEL: select_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: and w8, w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
|
@ -663,26 +311,24 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
|
define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
|
||||||
; NO_SVE-LABEL: select_v8f64:
|
; VBITS_GE_256-LABEL: select_v8f64:
|
||||||
; NO_SVE: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #48]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; VBITS_GE_256-NEXT: ptrue p1.d
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #16]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q4, [x1, #48]
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; NO_SVE-NEXT: dup v6.2d, x8
|
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q5, [x1]
|
; VBITS_GE_256-NEXT: mov z4.d, x9
|
||||||
; NO_SVE-NEXT: ldr q7, [x1, #16]
|
; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #32]
|
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
|
||||||
; NO_SVE-NEXT: bif v1.16b, v5.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
|
||||||
; NO_SVE-NEXT: bif v2.16b, v7.16b, v6.16b
|
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
|
||||||
; NO_SVE-NEXT: bif v0.16b, v4.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
; NO_SVE-NEXT: bif v3.16b, v16.16b, v6.16b
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
; NO_SVE-NEXT: stp q1, q2, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; NO_SVE-NEXT: stp q3, q0, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: select_v8f64:
|
; VBITS_GE_512-LABEL: select_v8f64:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -704,58 +350,20 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) #0 {
|
define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v16f64:
|
; CHECK-LABEL: select_v16f64:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #16]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0]
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #48]
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #32]
|
; CHECK-NEXT: mov z2.d, x8
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #80]
|
; CHECK-NEXT: and z2.d, z2.d, #0x1
|
||||||
; NO_SVE-NEXT: dup v21.2d, x8
|
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #64]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #112]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #96]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v16.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v17.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v18.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v19.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v20.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v21.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v16.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v17.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v18.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.d
|
|
||||||
; VBITS_GE_1024-NEXT: mov z2.d, x8
|
|
||||||
; VBITS_GE_1024-NEXT: and z2.d, z2.d, #0x1
|
|
||||||
; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.d, p1, z0.d, z1.d
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load volatile <16 x double>, <16 x double>* %a
|
%op1 = load volatile <16 x double>, <16 x double>* %a
|
||||||
%op2 = load volatile <16 x double>, <16 x double>* %b
|
%op2 = load volatile <16 x double>, <16 x double>* %b
|
||||||
%sel = select i1 %mask, <16 x double> %op1, <16 x double> %op2
|
%sel = select i1 %mask, <16 x double> %op1, <16 x double> %op2
|
||||||
|
@ -763,103 +371,20 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) #0 {
|
define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; NO_SVE-LABEL: select_v32f64:
|
; CHECK-LABEL: select_v32f64:
|
||||||
; NO_SVE: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; NO_SVE-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; NO_SVE-NEXT: .cfi_def_cfa_offset 32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; NO_SVE-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; NO_SVE-NEXT: .cfi_offset b8, -8
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; NO_SVE-NEXT: .cfi_offset b9, -16
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; NO_SVE-NEXT: .cfi_offset b10, -24
|
; CHECK-NEXT: mov z2.d, x8
|
||||||
; NO_SVE-NEXT: .cfi_offset b11, -32
|
; CHECK-NEXT: and z2.d, z2.d, #0x1
|
||||||
; NO_SVE-NEXT: tst w2, #0x1
|
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
; NO_SVE-NEXT: ldr q0, [x0, #240]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; NO_SVE-NEXT: csetm x8, ne
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; NO_SVE-NEXT: ldr q1, [x0, #224]
|
; CHECK-NEXT: ret
|
||||||
; NO_SVE-NEXT: ldr q2, [x0, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q3, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: ldr q4, [x0, #176]
|
|
||||||
; NO_SVE-NEXT: dup v8.2d, x8
|
|
||||||
; NO_SVE-NEXT: ldr q5, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: ldr q6, [x0, #144]
|
|
||||||
; NO_SVE-NEXT: ldr q7, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q16, [x0, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q17, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: ldr q18, [x0, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q19, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q20, [x0, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q21, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q22, [x0, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q23, [x0]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #240]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #224]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #208]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #192]
|
|
||||||
; NO_SVE-NEXT: bif v0.16b, v24.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #176]
|
|
||||||
; NO_SVE-NEXT: bif v1.16b, v25.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q29, [x1, #160]
|
|
||||||
; NO_SVE-NEXT: bif v2.16b, v26.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q30, [x1, #144]
|
|
||||||
; NO_SVE-NEXT: bif v3.16b, v27.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q31, [x1, #128]
|
|
||||||
; NO_SVE-NEXT: ldr q9, [x1, #112]
|
|
||||||
; NO_SVE-NEXT: ldr q10, [x1, #96]
|
|
||||||
; NO_SVE-NEXT: bif v4.16b, v28.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: ldr q28, [x1, #80]
|
|
||||||
; NO_SVE-NEXT: ldr q24, [x1, #64]
|
|
||||||
; NO_SVE-NEXT: ldr q25, [x1, #48]
|
|
||||||
; NO_SVE-NEXT: ldr q26, [x1, #32]
|
|
||||||
; NO_SVE-NEXT: ldr q27, [x1, #16]
|
|
||||||
; NO_SVE-NEXT: ldr q11, [x1]
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #192]
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v6.16b, v30.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v7.16b, v31.16b
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0, #160]
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q2, q1, [x0, #128]
|
|
||||||
; NO_SVE-NEXT: mov v1.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v16.16b, v9.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v17.16b, v10.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v18.16b, v28.16b
|
|
||||||
; NO_SVE-NEXT: bsl v1.16b, v19.16b, v24.16b
|
|
||||||
; NO_SVE-NEXT: mov v2.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q4, q3, [x0, #96]
|
|
||||||
; NO_SVE-NEXT: mov v3.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: mov v4.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: stp q1, q0, [x0, #64]
|
|
||||||
; NO_SVE-NEXT: mov v0.16b, v8.16b
|
|
||||||
; NO_SVE-NEXT: bsl v2.16b, v20.16b, v25.16b
|
|
||||||
; NO_SVE-NEXT: bsl v3.16b, v21.16b, v26.16b
|
|
||||||
; NO_SVE-NEXT: bsl v4.16b, v22.16b, v27.16b
|
|
||||||
; NO_SVE-NEXT: bsl v0.16b, v23.16b, v11.16b
|
|
||||||
; NO_SVE-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: stp q3, q2, [x0, #32]
|
|
||||||
; NO_SVE-NEXT: stp q0, q4, [x0]
|
|
||||||
; NO_SVE-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload
|
|
||||||
; NO_SVE-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: and w8, w2, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.d
|
|
||||||
; VBITS_GE_2048-NEXT: mov z2.d, x8
|
|
||||||
; VBITS_GE_2048-NEXT: and z2.d, z2.d, #0x1
|
|
||||||
; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.d, p1, z0.d, z1.d
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load volatile <32 x double>, <32 x double>* %a
|
%op1 = load volatile <32 x double>, <32 x double>* %a
|
||||||
%op2 = load volatile <32 x double>, <32 x double>* %b
|
%op2 = load volatile <32 x double>, <32 x double>* %b
|
||||||
%sel = select i1 %mask, <32 x double> %op1, <32 x double> %op2
|
%sel = select i1 %mask, <32 x double> %op1, <32 x double> %op2
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,26 +1,12 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) #0 {
|
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4f16:
|
; CHECK-LABEL: select_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: shl v2.4h, v2.4h, #15
|
; CHECK-NEXT: shl v2.4h, v2.4h, #15
|
||||||
|
@ -32,7 +18,7 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) #0 {
|
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v8f16:
|
; CHECK-LABEL: select_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
|
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
|
||||||
|
@ -44,7 +30,7 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
|
||||||
ret <8 x half> %sel
|
ret <8 x half> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16f16(<16 x half>* %a, <16 x half>* %b) #0 {
|
define void @select_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v16f16:
|
; CHECK-LABEL: select_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -96,44 +82,16 @@ define void @select_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64f16(<64 x half>* %a, <64 x half>* %b) #0 {
|
define void @select_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v64f16:
|
; CHECK-LABEL: select_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.h }, p0/z, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z2.h, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z1.h, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.h, p0/z, z0.h, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.h, p0/z, z3.h, z7.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.h, p3, z0.h, z6.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.h, p2, z1.h, z4.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.h, p1, z2.h, z5.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.h, p4, z3.h, z7.h
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.h, p1, z0.h, z1.h
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <64 x half>, <64 x half>* %a
|
%op1 = load <64 x half>, <64 x half>* %a
|
||||||
%op2 = load <64 x half>, <64 x half>* %b
|
%op2 = load <64 x half>, <64 x half>* %b
|
||||||
%mask = fcmp oeq <64 x half> %op1, %op2
|
%mask = fcmp oeq <64 x half> %op1, %op2
|
||||||
|
@ -142,68 +100,16 @@ define void @select_v64f16(<64 x half>* %a, <64 x half>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v128f16(<128 x half>* %a, <128 x half>* %b) #0 {
|
define void @select_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v128f16:
|
; CHECK-LABEL: select_v128f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #80
|
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: mov x12, #64
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: mov x13, #112
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #96
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.h }, p0/z, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z16.h }, p0/z, [x1, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z17.h }, p0/z, [x1, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z18.h }, p0/z, [x1, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z19.h }, p0/z, [x1, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z20.h }, p0/z, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z21.h }, p0/z, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z22.h }, p0/z, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z23.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z6.h, z17.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z5.h, z16.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.h, p0/z, z4.h, z19.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.h, p0/z, z3.h, z18.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p5.h, p0/z, z2.h, z21.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p6.h, p0/z, z1.h, z20.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p7.h, p0/z, z0.h, z22.h
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p8.h, p0/z, z7.h, z23.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.h, p7, z0.h, z22.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.h, p6, z1.h, z20.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.h, p5, z2.h, z21.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.h, p4, z3.h, z18.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z4.h, p3, z4.h, z19.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z5.h, p2, z5.h, z16.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z6.h, p1, z6.h, z17.h
|
|
||||||
; VBITS_GE_256-NEXT: sel z7.h, p8, z7.h, z23.h
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z6.h }, p0, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z5.h }, p0, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z7.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v128f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z1.h }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.h, p1, z0.h, z1.h
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <128 x half>, <128 x half>* %a
|
%op1 = load <128 x half>, <128 x half>* %a
|
||||||
%op2 = load <128 x half>, <128 x half>* %b
|
%op2 = load <128 x half>, <128 x half>* %b
|
||||||
%mask = fcmp oeq <128 x half> %op1, %op2
|
%mask = fcmp oeq <128 x half> %op1, %op2
|
||||||
|
@ -213,7 +119,7 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) #0 {
|
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v2f32:
|
; CHECK-LABEL: select_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: shl v2.2s, v2.2s, #31
|
; CHECK-NEXT: shl v2.2s, v2.2s, #31
|
||||||
|
@ -225,7 +131,7 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) #0 {
|
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4f32:
|
; CHECK-LABEL: select_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
|
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
|
||||||
|
@ -237,7 +143,7 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
|
||||||
ret <4 x float> %sel
|
ret <4 x float> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v8f32(<8 x float>* %a, <8 x float>* %b) #0 {
|
define void @select_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v8f32:
|
; CHECK-LABEL: select_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -289,44 +195,16 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32f32(<32 x float>* %a, <32 x float>* %b) #0 {
|
define void @select_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v32f32:
|
; CHECK-LABEL: select_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z2.s, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z1.s, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.s, p0/z, z0.s, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.s, p0/z, z3.s, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.s, p3, z0.s, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.s, p2, z1.s, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.s, p1, z2.s, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.s, p4, z3.s, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.s, p1, z0.s, z1.s
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%op2 = load <32 x float>, <32 x float>* %b
|
%op2 = load <32 x float>, <32 x float>* %b
|
||||||
%mask = fcmp oeq <32 x float> %op1, %op2
|
%mask = fcmp oeq <32 x float> %op1, %op2
|
||||||
|
@ -335,68 +213,16 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64f32(<64 x float>* %a, <64 x float>* %b) #0 {
|
define void @select_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v64f32:
|
; CHECK-LABEL: select_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: mov x12, #32
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: mov x13, #56
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #48
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z16.s }, p0/z, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z17.s }, p0/z, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z18.s }, p0/z, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z19.s }, p0/z, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z20.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z21.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z22.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z23.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z6.s, z17.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z5.s, z16.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.s, p0/z, z4.s, z19.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.s, p0/z, z3.s, z18.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p5.s, p0/z, z2.s, z21.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p6.s, p0/z, z1.s, z20.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p7.s, p0/z, z0.s, z22.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p8.s, p0/z, z7.s, z23.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.s, p7, z0.s, z22.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.s, p6, z1.s, z20.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.s, p5, z2.s, z21.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.s, p4, z3.s, z18.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z4.s, p3, z4.s, z19.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z5.s, p2, z5.s, z16.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z6.s, p1, z6.s, z17.s
|
|
||||||
; VBITS_GE_256-NEXT: sel z7.s, p8, z7.s, z23.s
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.s, p1, z0.s, z1.s
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <64 x float>, <64 x float>* %a
|
%op1 = load <64 x float>, <64 x float>* %a
|
||||||
%op2 = load <64 x float>, <64 x float>* %b
|
%op2 = load <64 x float>, <64 x float>* %b
|
||||||
%mask = fcmp oeq <64 x float> %op1, %op2
|
%mask = fcmp oeq <64 x float> %op1, %op2
|
||||||
|
@ -406,7 +232,7 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> %mask) #0 {
|
define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v1f64:
|
; CHECK-LABEL: select_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tst w0, #0x1
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
|
@ -419,7 +245,7 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) #0 {
|
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v2f64:
|
; CHECK-LABEL: select_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
|
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
|
||||||
|
@ -431,7 +257,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
|
||||||
ret <2 x double> %sel
|
ret <2 x double> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v4f64(<4 x double>* %a, <4 x double>* %b) #0 {
|
define void @select_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4f64:
|
; CHECK-LABEL: select_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -483,44 +309,16 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16f64(<16 x double>* %a, <16 x double>* %b) #0 {
|
define void @select_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v16f64:
|
; CHECK-LABEL: select_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z2.d, z5.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, z4.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.d, p0/z, z0.d, z6.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.d, p0/z, z3.d, z7.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.d, p3, z0.d, z6.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.d, p2, z1.d, z4.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.d, p1, z2.d, z5.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.d, p4, z3.d, z7.d
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: select_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
|
||||||
; VBITS_GE_1024-NEXT: sel z0.d, p1, z0.d, z1.d
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%op2 = load <16 x double>, <16 x double>* %b
|
%op2 = load <16 x double>, <16 x double>* %b
|
||||||
%mask = fcmp oeq <16 x double> %op1, %op2
|
%mask = fcmp oeq <16 x double> %op1, %op2
|
||||||
|
@ -529,68 +327,16 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32f64(<32 x double>* %a, <32 x double>* %b) #0 {
|
define void @select_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: select_v32f64:
|
; CHECK-LABEL: select_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: mov x13, #28
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x14, #24
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z16.d }, p0/z, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z17.d }, p0/z, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z18.d }, p0/z, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z19.d }, p0/z, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z20.d }, p0/z, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z21.d }, p0/z, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z22.d }, p0/z, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z23.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z6.d, z17.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z5.d, z16.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.d, p0/z, z4.d, z19.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.d, p0/z, z3.d, z18.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p5.d, p0/z, z2.d, z21.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p6.d, p0/z, z1.d, z20.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p7.d, p0/z, z0.d, z22.d
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p8.d, p0/z, z7.d, z23.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z0.d, p7, z0.d, z22.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z1.d, p6, z1.d, z20.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z2.d, p5, z2.d, z21.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z3.d, p4, z3.d, z18.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z4.d, p3, z4.d, z19.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z5.d, p2, z5.d, z16.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z6.d, p1, z6.d, z17.d
|
|
||||||
; VBITS_GE_256-NEXT: sel z7.d, p8, z7.d, z23.d
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: select_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
|
|
||||||
; VBITS_GE_2048-NEXT: sel z0.d, p1, z0.d, z1.d
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%op2 = load <32 x double>, <32 x double>* %b
|
%op2 = load <32 x double>, <32 x double>* %b
|
||||||
%mask = fcmp oeq <32 x double> %op1, %op2
|
%mask = fcmp oeq <32 x double> %op1, %op2
|
||||||
|
@ -599,4 +345,4 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+sve" uwtable }
|
attributes #0 = { "target-features"="+sve" }
|
||||||
|
|
|
@ -1,21 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -24,49 +10,66 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x half> @insertelement_v4f16(<4 x half> %op1) #0 {
|
define <4 x half> @insertelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v4f16:
|
; CHECK-LABEL: insertelement_v4f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: fmov h1, #5.00000000
|
; CHECK-NEXT: fmov h1, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
; VBITS_GE_256-NEXT: mov v0.h[3], v1.h[0]
|
; CHECK-NEXT: mov v0.h[3], v1.h[0]
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <4 x half> %op1, half 5.0, i64 3
|
%r = insertelement <4 x half> %op1, half 5.0, i64 3
|
||||||
ret <4 x half> %r
|
ret <4 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x half> @insertelement_v8f16(<8 x half> %op1) #0 {
|
define <8 x half> @insertelement_v8f16(<8 x half> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v8f16:
|
; CHECK-LABEL: insertelement_v8f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: fmov h1, #5.00000000
|
; CHECK-NEXT: fmov h1, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: mov v0.h[7], v1.h[0]
|
; CHECK-NEXT: mov v0.h[7], v1.h[0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <8 x half> %op1, half 5.0, i64 7
|
%r = insertelement <8 x half> %op1, half 5.0, i64 7
|
||||||
ret <8 x half> %r
|
ret <8 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x half> @insertelement_v16f16(<16 x half>* %a) #0 {
|
define <16 x half> @insertelement_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v16f16:
|
; CHECK-LABEL: insertelement_v16f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w9, #15
|
; CHECK-NEXT: mov w9, #15
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: fmov h2, #5.00000000
|
; CHECK-NEXT: fmov h2, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: index z3.h, #0, #1
|
; CHECK-NEXT: index z3.h, #0, #1
|
||||||
; VBITS_GE_256-NEXT: ptrue p1.h
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_256-NEXT: mov z1.h, w9
|
; CHECK-NEXT: mov z1.h, w9
|
||||||
; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, p1/m, h2
|
; CHECK-NEXT: mov z0.h, p1/m, h2
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x half>, <16 x half>* %a
|
%op1 = load <16 x half>, <16 x half>* %a
|
||||||
%r = insertelement <16 x half> %op1, half 5.0, i64 15
|
%r = insertelement <16 x half> %op1, half 5.0, i64 15
|
||||||
ret <16 x half> %r
|
ret <16 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
|
define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
|
||||||
|
; VBITS_GE_256-LABEL: insertelement_v32f16:
|
||||||
|
; VBITS_GE_256: // %bb.0:
|
||||||
|
; VBITS_GE_256-NEXT: mov x9, #16
|
||||||
|
; VBITS_GE_256-NEXT: mov w10, #15
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
|
; VBITS_GE_256-NEXT: fmov h3, #5.00000000
|
||||||
|
; VBITS_GE_256-NEXT: index z4.h, #0, #1
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p1.h
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: mov z2.h, w10
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z4.h, z2.h
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.h, p1/m, h3
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8, x9, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
; VBITS_GE_512-LABEL: insertelement_v32f16:
|
; VBITS_GE_512-LABEL: insertelement_v32f16:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: mov w9, #31
|
; VBITS_GE_512-NEXT: mov w9, #31
|
||||||
|
@ -85,88 +88,105 @@ define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
|
||||||
ret <32 x half> %r
|
ret <32 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <64 x half> @insertelement_v64f16(<64 x half>* %a) #0 {
|
define <64 x half> @insertelement_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: insertelement_v64f16:
|
; CHECK-LABEL: insertelement_v64f16:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w9, #63
|
; CHECK-NEXT: mov w9, #63
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: fmov h2, #5.00000000
|
; CHECK-NEXT: fmov h2, #5.00000000
|
||||||
; VBITS_GE_1024-NEXT: index z3.h, #0, #1
|
; CHECK-NEXT: index z3.h, #0, #1
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.h
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_1024-NEXT: mov z1.h, w9
|
; CHECK-NEXT: mov z1.h, w9
|
||||||
; VBITS_GE_1024-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
||||||
; VBITS_GE_1024-NEXT: mov z0.h, p1/m, h2
|
; CHECK-NEXT: mov z0.h, p1/m, h2
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x8]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x half>, <64 x half>* %a
|
%op1 = load <64 x half>, <64 x half>* %a
|
||||||
%r = insertelement <64 x half> %op1, half 5.0, i64 63
|
%r = insertelement <64 x half> %op1, half 5.0, i64 63
|
||||||
ret <64 x half> %r
|
ret <64 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <128 x half> @insertelement_v128f16(<128 x half>* %a) #0 {
|
define <128 x half> @insertelement_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: insertelement_v128f16:
|
; CHECK-LABEL: insertelement_v128f16:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w9, #127
|
; CHECK-NEXT: mov w9, #127
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: fmov h2, #5.00000000
|
; CHECK-NEXT: fmov h2, #5.00000000
|
||||||
; VBITS_GE_2048-NEXT: index z3.h, #0, #1
|
; CHECK-NEXT: index z3.h, #0, #1
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.h
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_2048-NEXT: mov z1.h, w9
|
; CHECK-NEXT: mov z1.h, w9
|
||||||
; VBITS_GE_2048-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h
|
||||||
; VBITS_GE_2048-NEXT: mov z0.h, p1/m, h2
|
; CHECK-NEXT: mov z0.h, p1/m, h2
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x8]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <128 x half>, <128 x half>* %a
|
%op1 = load <128 x half>, <128 x half>* %a
|
||||||
%r = insertelement <128 x half> %op1, half 5.0, i64 127
|
%r = insertelement <128 x half> %op1, half 5.0, i64 127
|
||||||
ret <128 x half> %r
|
ret <128 x half> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @insertelement_v2f32(<2 x float> %op1) #0 {
|
define <2 x float> @insertelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v2f32:
|
; CHECK-LABEL: insertelement_v2f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: fmov s1, #5.00000000
|
; CHECK-NEXT: fmov s1, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
; VBITS_GE_256-NEXT: mov v0.s[1], v1.s[0]
|
; CHECK-NEXT: mov v0.s[1], v1.s[0]
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <2 x float> %op1, float 5.0, i64 1
|
%r = insertelement <2 x float> %op1, float 5.0, i64 1
|
||||||
ret <2 x float> %r
|
ret <2 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @insertelement_v4f32(<4 x float> %op1) #0 {
|
define <4 x float> @insertelement_v4f32(<4 x float> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v4f32:
|
; CHECK-LABEL: insertelement_v4f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: fmov s1, #5.00000000
|
; CHECK-NEXT: fmov s1, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: mov v0.s[3], v1.s[0]
|
; CHECK-NEXT: mov v0.s[3], v1.s[0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <4 x float> %op1, float 5.0, i64 3
|
%r = insertelement <4 x float> %op1, float 5.0, i64 3
|
||||||
ret <4 x float> %r
|
ret <4 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x float> @insertelement_v8f32(<8 x float>* %a) #0 {
|
define <8 x float> @insertelement_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v8f32:
|
; CHECK-LABEL: insertelement_v8f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w9, #7
|
; CHECK-NEXT: mov w9, #7
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: fmov s2, #5.00000000
|
; CHECK-NEXT: fmov s2, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: index z3.s, #0, #1
|
; CHECK-NEXT: index z3.s, #0, #1
|
||||||
; VBITS_GE_256-NEXT: ptrue p1.s
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_256-NEXT: mov z1.s, w9
|
; CHECK-NEXT: mov z1.s, w9
|
||||||
; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, p1/m, s2
|
; CHECK-NEXT: mov z0.s, p1/m, s2
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <8 x float>, <8 x float>* %a
|
%op1 = load <8 x float>, <8 x float>* %a
|
||||||
%r = insertelement <8 x float> %op1, float 5.0, i64 7
|
%r = insertelement <8 x float> %op1, float 5.0, i64 7
|
||||||
ret <8 x float> %r
|
ret <8 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
|
define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
|
||||||
|
; VBITS_GE_256-LABEL: insertelement_v16f32:
|
||||||
|
; VBITS_GE_256: // %bb.0:
|
||||||
|
; VBITS_GE_256-NEXT: mov x9, #8
|
||||||
|
; VBITS_GE_256-NEXT: mov w10, #7
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
|
; VBITS_GE_256-NEXT: fmov s3, #5.00000000
|
||||||
|
; VBITS_GE_256-NEXT: index z4.s, #0, #1
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p1.s
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: mov z2.s, w10
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z4.s, z2.s
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.s, p1/m, s3
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
; VBITS_GE_512-LABEL: insertelement_v16f32:
|
; VBITS_GE_512-LABEL: insertelement_v16f32:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: mov w9, #15
|
; VBITS_GE_512-NEXT: mov w9, #15
|
||||||
|
@ -185,86 +205,103 @@ define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
|
||||||
ret <16 x float> %r
|
ret <16 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x float> @insertelement_v32f32(<32 x float>* %a) #0 {
|
define <32 x float> @insertelement_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: insertelement_v32f32:
|
; CHECK-LABEL: insertelement_v32f32:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w9, #31
|
; CHECK-NEXT: mov w9, #31
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: fmov s2, #5.00000000
|
; CHECK-NEXT: fmov s2, #5.00000000
|
||||||
; VBITS_GE_1024-NEXT: index z3.s, #0, #1
|
; CHECK-NEXT: index z3.s, #0, #1
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.s
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_1024-NEXT: mov z1.s, w9
|
; CHECK-NEXT: mov z1.s, w9
|
||||||
; VBITS_GE_1024-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
||||||
; VBITS_GE_1024-NEXT: mov z0.s, p1/m, s2
|
; CHECK-NEXT: mov z0.s, p1/m, s2
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x float>, <32 x float>* %a
|
%op1 = load <32 x float>, <32 x float>* %a
|
||||||
%r = insertelement <32 x float> %op1, float 5.0, i64 31
|
%r = insertelement <32 x float> %op1, float 5.0, i64 31
|
||||||
ret <32 x float> %r
|
ret <32 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <64 x float> @insertelement_v64f32(<64 x float>* %a) #0 {
|
define <64 x float> @insertelement_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: insertelement_v64f32:
|
; CHECK-LABEL: insertelement_v64f32:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w9, #63
|
; CHECK-NEXT: mov w9, #63
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: fmov s2, #5.00000000
|
; CHECK-NEXT: fmov s2, #5.00000000
|
||||||
; VBITS_GE_2048-NEXT: index z3.s, #0, #1
|
; CHECK-NEXT: index z3.s, #0, #1
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.s
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_2048-NEXT: mov z1.s, w9
|
; CHECK-NEXT: mov z1.s, w9
|
||||||
; VBITS_GE_2048-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s
|
||||||
; VBITS_GE_2048-NEXT: mov z0.s, p1/m, s2
|
; CHECK-NEXT: mov z0.s, p1/m, s2
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x float>, <64 x float>* %a
|
%op1 = load <64 x float>, <64 x float>* %a
|
||||||
%r = insertelement <64 x float> %op1, float 5.0, i64 63
|
%r = insertelement <64 x float> %op1, float 5.0, i64 63
|
||||||
ret <64 x float> %r
|
ret <64 x float> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x double> @insertelement_v1f64(<1 x double> %op1) #0 {
|
define <1 x double> @insertelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v1f64:
|
; CHECK-LABEL: insertelement_v1f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4617315517961601024
|
; CHECK-NEXT: mov x8, #4617315517961601024
|
||||||
; VBITS_GE_256-NEXT: fmov d0, x8
|
; CHECK-NEXT: fmov d0, x8
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <1 x double> %op1, double 5.0, i64 0
|
%r = insertelement <1 x double> %op1, double 5.0, i64 0
|
||||||
ret <1 x double> %r
|
ret <1 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x double> @insertelement_v2f64(<2 x double> %op1) #0 {
|
define <2 x double> @insertelement_v2f64(<2 x double> %op1) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v2f64:
|
; CHECK-LABEL: insertelement_v2f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: fmov d1, #5.00000000
|
; CHECK-NEXT: fmov d1, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0]
|
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%r = insertelement <2 x double> %op1, double 5.0, i64 1
|
%r = insertelement <2 x double> %op1, double 5.0, i64 1
|
||||||
ret <2 x double> %r
|
ret <2 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x double> @insertelement_v4f64(<4 x double>* %a) #0 {
|
define <4 x double> @insertelement_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: insertelement_v4f64:
|
; CHECK-LABEL: insertelement_v4f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w9, #3
|
; CHECK-NEXT: mov w9, #3
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: fmov d2, #5.00000000
|
; CHECK-NEXT: fmov d2, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: index z3.d, #0, #1
|
; CHECK-NEXT: index z3.d, #0, #1
|
||||||
; VBITS_GE_256-NEXT: ptrue p1.d
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_256-NEXT: mov z1.d, x9
|
; CHECK-NEXT: mov z1.d, x9
|
||||||
; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, p1/m, d2
|
; CHECK-NEXT: mov z0.d, p1/m, d2
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <4 x double>, <4 x double>* %a
|
%op1 = load <4 x double>, <4 x double>* %a
|
||||||
%r = insertelement <4 x double> %op1, double 5.0, i64 3
|
%r = insertelement <4 x double> %op1, double 5.0, i64 3
|
||||||
ret <4 x double> %r
|
ret <4 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
|
define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
|
||||||
|
; VBITS_GE_256-LABEL: insertelement_v8f64:
|
||||||
|
; VBITS_GE_256: // %bb.0:
|
||||||
|
; VBITS_GE_256-NEXT: mov x9, #4
|
||||||
|
; VBITS_GE_256-NEXT: mov w10, #3
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
|
; VBITS_GE_256-NEXT: fmov d3, #5.00000000
|
||||||
|
; VBITS_GE_256-NEXT: index z4.d, #0, #1
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p1.d
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: mov z2.d, x10
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z4.d, z2.d
|
||||||
|
; VBITS_GE_256-NEXT: mov z0.d, p1/m, d3
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
; VBITS_GE_512-LABEL: insertelement_v8f64:
|
; VBITS_GE_512-LABEL: insertelement_v8f64:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: mov w9, #7
|
; VBITS_GE_512-NEXT: mov w9, #7
|
||||||
|
@ -283,39 +320,39 @@ define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
|
||||||
ret <8 x double> %r
|
ret <8 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x double> @insertelement_v16f64(<16 x double>* %a) #0 {
|
define <16 x double> @insertelement_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: insertelement_v16f64:
|
; CHECK-LABEL: insertelement_v16f64:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: mov w9, #15
|
; CHECK-NEXT: mov w9, #15
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: fmov d2, #5.00000000
|
; CHECK-NEXT: fmov d2, #5.00000000
|
||||||
; VBITS_GE_1024-NEXT: index z3.d, #0, #1
|
; CHECK-NEXT: index z3.d, #0, #1
|
||||||
; VBITS_GE_1024-NEXT: ptrue p1.d
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_1024-NEXT: mov z1.d, x9
|
; CHECK-NEXT: mov z1.d, x9
|
||||||
; VBITS_GE_1024-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
||||||
; VBITS_GE_1024-NEXT: mov z0.d, p1/m, d2
|
; CHECK-NEXT: mov z0.d, p1/m, d2
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x double>, <16 x double>* %a
|
%op1 = load <16 x double>, <16 x double>* %a
|
||||||
%r = insertelement <16 x double> %op1, double 5.0, i64 15
|
%r = insertelement <16 x double> %op1, double 5.0, i64 15
|
||||||
ret <16 x double> %r
|
ret <16 x double> %r
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x double> @insertelement_v32f64(<32 x double>* %a) #0 {
|
define <32 x double> @insertelement_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: insertelement_v32f64:
|
; CHECK-LABEL: insertelement_v32f64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: mov w9, #31
|
; CHECK-NEXT: mov w9, #31
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: fmov d2, #5.00000000
|
; CHECK-NEXT: fmov d2, #5.00000000
|
||||||
; VBITS_GE_2048-NEXT: index z3.d, #0, #1
|
; CHECK-NEXT: index z3.d, #0, #1
|
||||||
; VBITS_GE_2048-NEXT: ptrue p1.d
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_2048-NEXT: mov z1.d, x9
|
; CHECK-NEXT: mov z1.d, x9
|
||||||
; VBITS_GE_2048-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d
|
||||||
; VBITS_GE_2048-NEXT: mov z0.d, p1/m, d2
|
; CHECK-NEXT: mov z0.d, p1/m, d2
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x double>, <32 x double>* %a
|
%op1 = load <32 x double>, <32 x double>* %a
|
||||||
%r = insertelement <32 x double> %op1, double 5.0, i64 31
|
%r = insertelement <32 x double> %op1, double 5.0, i64 31
|
||||||
ret <32 x double> %r
|
ret <32 x double> %r
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,58 +1,46 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: z{0-9}
|
|
||||||
|
|
||||||
;
|
;
|
||||||
; ICMP EQ
|
; ICMP EQ
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
|
define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v8i8:
|
; CHECK-LABEL: icmp_eq_v8i8:
|
||||||
; CHECK: cmeq v0.8b, v0.8b, v1.8b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.8b, v0.8b, v1.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <8 x i8> %op1, %op2
|
%cmp = icmp eq <8 x i8> %op1, %op2
|
||||||
%sext = sext <8 x i1> %cmp to <8 x i8>
|
%sext = sext <8 x i1> %cmp to <8 x i8>
|
||||||
ret <8 x i8> %sext
|
ret <8 x i8> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
|
define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v16i8:
|
; CHECK-LABEL: icmp_eq_v16i8:
|
||||||
; CHECK: cmeq v0.16b, v0.16b, v1.16b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <16 x i8> %op1, %op2
|
%cmp = icmp eq <16 x i8> %op1, %op2
|
||||||
%sext = sext <16 x i1> %cmp to <16 x i8>
|
%sext = sext <16 x i1> %cmp to <16 x i8>
|
||||||
ret <16 x i8> %sext
|
ret <16 x i8> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
|
define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v32i8:
|
; CHECK-LABEL: icmp_eq_v32i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
|
||||||
; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i8>, <32 x i8>* %a
|
%op1 = load <32 x i8>, <32 x i8>* %a
|
||||||
%op2 = load <32 x i8>, <32 x i8>* %b
|
%op2 = load <32 x i8>, <32 x i8>* %b
|
||||||
%cmp = icmp eq <32 x i8> %op1, %op2
|
%cmp = icmp eq <32 x i8> %op1, %op2
|
||||||
|
@ -62,29 +50,31 @@ define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
|
define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v64i8:
|
; VBITS_GE_256-LABEL: icmp_eq_v64i8:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov w8, #32
|
||||||
; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
|
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
|
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z2.b
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z1.b, z3.b
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
|
; VBITS_GE_256-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
|
; VBITS_GE_256-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
|
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]]
|
;
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].b, [[PG]]/z, [[OP1_LO]].b, [[OP2_LO]].b
|
; VBITS_GE_512-LABEL: icmp_eq_v64i8:
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].b, [[PG]]/z, [[OP1_HI]].b, [[OP2_HI]].b
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].b, [[CMP_LO]]/z, #-1
|
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].b, [[CMP_HI]]/z, #-1
|
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: st1b { [[SEXT_LO]].b }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
|
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <64 x i8>, <64 x i8>* %a
|
%op1 = load <64 x i8>, <64 x i8>* %a
|
||||||
%op2 = load <64 x i8>, <64 x i8>* %b
|
%op2 = load <64 x i8>, <64 x i8>* %b
|
||||||
%cmp = icmp eq <64 x i8> %op1, %op2
|
%cmp = icmp eq <64 x i8> %op1, %op2
|
||||||
|
@ -93,15 +83,16 @@ define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
|
define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v128i8:
|
; CHECK-LABEL: icmp_eq_v128i8:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
|
||||||
; VBITS_GE_1024-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <128 x i8>, <128 x i8>* %a
|
%op1 = load <128 x i8>, <128 x i8>* %a
|
||||||
%op2 = load <128 x i8>, <128 x i8>* %b
|
%op2 = load <128 x i8>, <128 x i8>* %b
|
||||||
%cmp = icmp eq <128 x i8> %op1, %op2
|
%cmp = icmp eq <128 x i8> %op1, %op2
|
||||||
|
@ -110,15 +101,16 @@ define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
|
define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v256i8:
|
; CHECK-LABEL: icmp_eq_v256i8:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
|
||||||
; VBITS_GE_2048-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <256 x i8>, <256 x i8>* %a
|
%op1 = load <256 x i8>, <256 x i8>* %a
|
||||||
%op2 = load <256 x i8>, <256 x i8>* %b
|
%op2 = load <256 x i8>, <256 x i8>* %b
|
||||||
%cmp = icmp eq <256 x i8> %op1, %op2
|
%cmp = icmp eq <256 x i8> %op1, %op2
|
||||||
|
@ -128,34 +120,37 @@ define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
|
define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v4i16:
|
; CHECK-LABEL: icmp_eq_v4i16:
|
||||||
; CHECK: cmeq v0.4h, v0.4h, v1.4h
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.4h, v0.4h, v1.4h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <4 x i16> %op1, %op2
|
%cmp = icmp eq <4 x i16> %op1, %op2
|
||||||
%sext = sext <4 x i1> %cmp to <4 x i16>
|
%sext = sext <4 x i1> %cmp to <4 x i16>
|
||||||
ret <4 x i16> %sext
|
ret <4 x i16> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
|
define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v8i16:
|
; CHECK-LABEL: icmp_eq_v8i16:
|
||||||
; CHECK: cmeq v0.8h, v0.8h, v1.8h
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.8h, v0.8h, v1.8h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <8 x i16> %op1, %op2
|
%cmp = icmp eq <8 x i16> %op1, %op2
|
||||||
%sext = sext <8 x i1> %cmp to <8 x i16>
|
%sext = sext <8 x i1> %cmp to <8 x i16>
|
||||||
ret <8 x i16> %sext
|
ret <8 x i16> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
|
define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v16i16:
|
; CHECK-LABEL: icmp_eq_v16i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
|
||||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x i16>, <16 x i16>* %a
|
%op1 = load <16 x i16>, <16 x i16>* %a
|
||||||
%op2 = load <16 x i16>, <16 x i16>* %b
|
%op2 = load <16 x i16>, <16 x i16>* %b
|
||||||
%cmp = icmp eq <16 x i16> %op1, %op2
|
%cmp = icmp eq <16 x i16> %op1, %op2
|
||||||
|
@ -165,29 +160,31 @@ define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
|
define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v32i16:
|
; VBITS_GE_256-LABEL: icmp_eq_v32i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, z3.h
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
|
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
|
; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1]
|
;
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
|
; VBITS_GE_512-LABEL: icmp_eq_v32i16:
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
|
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <32 x i16>, <32 x i16>* %a
|
%op1 = load <32 x i16>, <32 x i16>* %a
|
||||||
%op2 = load <32 x i16>, <32 x i16>* %b
|
%op2 = load <32 x i16>, <32 x i16>* %b
|
||||||
%cmp = icmp eq <32 x i16> %op1, %op2
|
%cmp = icmp eq <32 x i16> %op1, %op2
|
||||||
|
@ -196,15 +193,16 @@ define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
|
define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v64i16:
|
; CHECK-LABEL: icmp_eq_v64i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x i16>, <64 x i16>* %a
|
%op1 = load <64 x i16>, <64 x i16>* %a
|
||||||
%op2 = load <64 x i16>, <64 x i16>* %b
|
%op2 = load <64 x i16>, <64 x i16>* %b
|
||||||
%cmp = icmp eq <64 x i16> %op1, %op2
|
%cmp = icmp eq <64 x i16> %op1, %op2
|
||||||
|
@ -213,15 +211,16 @@ define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
|
define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v128i16:
|
; CHECK-LABEL: icmp_eq_v128i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <128 x i16>, <128 x i16>* %a
|
%op1 = load <128 x i16>, <128 x i16>* %a
|
||||||
%op2 = load <128 x i16>, <128 x i16>* %b
|
%op2 = load <128 x i16>, <128 x i16>* %b
|
||||||
%cmp = icmp eq <128 x i16> %op1, %op2
|
%cmp = icmp eq <128 x i16> %op1, %op2
|
||||||
|
@ -231,34 +230,37 @@ define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 {
|
define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v2i32:
|
; CHECK-LABEL: icmp_eq_v2i32:
|
||||||
; CHECK: cmeq v0.2s, v0.2s, v1.2s
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.2s, v0.2s, v1.2s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <2 x i32> %op1, %op2
|
%cmp = icmp eq <2 x i32> %op1, %op2
|
||||||
%sext = sext <2 x i1> %cmp to <2 x i32>
|
%sext = sext <2 x i1> %cmp to <2 x i32>
|
||||||
ret <2 x i32> %sext
|
ret <2 x i32> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
|
define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v4i32:
|
; CHECK-LABEL: icmp_eq_v4i32:
|
||||||
; CHECK: cmeq v0.4s, v0.4s, v1.4s
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <4 x i32> %op1, %op2
|
%cmp = icmp eq <4 x i32> %op1, %op2
|
||||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||||
ret <4 x i32> %sext
|
ret <4 x i32> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
|
define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v8i32:
|
; CHECK-LABEL: icmp_eq_v8i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
|
||||||
; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <8 x i32>, <8 x i32>* %a
|
%op1 = load <8 x i32>, <8 x i32>* %a
|
||||||
%op2 = load <8 x i32>, <8 x i32>* %b
|
%op2 = load <8 x i32>, <8 x i32>* %b
|
||||||
%cmp = icmp eq <8 x i32> %op1, %op2
|
%cmp = icmp eq <8 x i32> %op1, %op2
|
||||||
|
@ -268,29 +270,31 @@ define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
|
define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v16i32:
|
; VBITS_GE_256-LABEL: icmp_eq_v16i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
|
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2]
|
;
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
|
; VBITS_GE_512-LABEL: icmp_eq_v16i32:
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <16 x i32>, <16 x i32>* %a
|
%op1 = load <16 x i32>, <16 x i32>* %a
|
||||||
%op2 = load <16 x i32>, <16 x i32>* %b
|
%op2 = load <16 x i32>, <16 x i32>* %b
|
||||||
%cmp = icmp eq <16 x i32> %op1, %op2
|
%cmp = icmp eq <16 x i32> %op1, %op2
|
||||||
|
@ -299,15 +303,16 @@ define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
|
define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v32i32:
|
; CHECK-LABEL: icmp_eq_v32i32:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i32>, <32 x i32>* %a
|
%op1 = load <32 x i32>, <32 x i32>* %a
|
||||||
%op2 = load <32 x i32>, <32 x i32>* %b
|
%op2 = load <32 x i32>, <32 x i32>* %b
|
||||||
%cmp = icmp eq <32 x i32> %op1, %op2
|
%cmp = icmp eq <32 x i32> %op1, %op2
|
||||||
|
@ -316,15 +321,16 @@ define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
|
define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v64i32:
|
; CHECK-LABEL: icmp_eq_v64i32:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <64 x i32>, <64 x i32>* %a
|
%op1 = load <64 x i32>, <64 x i32>* %a
|
||||||
%op2 = load <64 x i32>, <64 x i32>* %b
|
%op2 = load <64 x i32>, <64 x i32>* %b
|
||||||
%cmp = icmp eq <64 x i32> %op1, %op2
|
%cmp = icmp eq <64 x i32> %op1, %op2
|
||||||
|
@ -334,34 +340,37 @@ define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
|
define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v1i64:
|
; CHECK-LABEL: icmp_eq_v1i64:
|
||||||
; CHECK: cmeq d0, d0, d1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq d0, d0, d1
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <1 x i64> %op1, %op2
|
%cmp = icmp eq <1 x i64> %op1, %op2
|
||||||
%sext = sext <1 x i1> %cmp to <1 x i64>
|
%sext = sext <1 x i1> %cmp to <1 x i64>
|
||||||
ret <1 x i64> %sext
|
ret <1 x i64> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
|
define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v2i64:
|
; CHECK-LABEL: icmp_eq_v2i64:
|
||||||
; CHECK: cmeq v0.2d, v0.2d, v1.2d
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%cmp = icmp eq <2 x i64> %op1, %op2
|
%cmp = icmp eq <2 x i64> %op1, %op2
|
||||||
%sext = sext <2 x i1> %cmp to <2 x i64>
|
%sext = sext <2 x i1> %cmp to <2 x i64>
|
||||||
ret <2 x i64> %sext
|
ret <2 x i64> %sext
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
|
define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v4i64:
|
; CHECK-LABEL: icmp_eq_v4i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
|
||||||
; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <4 x i64>, <4 x i64>* %a
|
%op1 = load <4 x i64>, <4 x i64>* %a
|
||||||
%op2 = load <4 x i64>, <4 x i64>* %b
|
%op2 = load <4 x i64>, <4 x i64>* %b
|
||||||
%cmp = icmp eq <4 x i64> %op1, %op2
|
%cmp = icmp eq <4 x i64> %op1, %op2
|
||||||
|
@ -371,29 +380,31 @@ define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
|
define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v8i64:
|
; VBITS_GE_256-LABEL: icmp_eq_v8i64:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3]
|
;
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
|
; VBITS_GE_512-LABEL: icmp_eq_v8i64:
|
||||||
; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load <8 x i64>, <8 x i64>* %a
|
%op1 = load <8 x i64>, <8 x i64>* %a
|
||||||
%op2 = load <8 x i64>, <8 x i64>* %b
|
%op2 = load <8 x i64>, <8 x i64>* %b
|
||||||
%cmp = icmp eq <8 x i64> %op1, %op2
|
%cmp = icmp eq <8 x i64> %op1, %op2
|
||||||
|
@ -402,15 +413,16 @@ define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
|
define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v16i64:
|
; CHECK-LABEL: icmp_eq_v16i64:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x i64>, <16 x i64>* %a
|
%op1 = load <16 x i64>, <16 x i64>* %a
|
||||||
%op2 = load <16 x i64>, <16 x i64>* %b
|
%op2 = load <16 x i64>, <16 x i64>* %b
|
||||||
%cmp = icmp eq <16 x i64> %op1, %op2
|
%cmp = icmp eq <16 x i64> %op1, %op2
|
||||||
|
@ -419,15 +431,16 @@ define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
|
define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: icmp_eq_v32i64:
|
; CHECK-LABEL: icmp_eq_v32i64:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: cmpeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i64>, <32 x i64>* %a
|
%op1 = load <32 x i64>, <32 x i64>* %a
|
||||||
%op2 = load <32 x i64>, <32 x i64>* %b
|
%op2 = load <32 x i64>, <32 x i64>* %b
|
||||||
%cmp = icmp eq <32 x i64> %op1, %op2
|
%cmp = icmp eq <32 x i64> %op1, %op2
|
||||||
|
@ -440,15 +453,16 @@ define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
|
||||||
; ICMP NE
|
; ICMP NE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
|
define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_ne_v32i8:
|
; CHECK-LABEL: icmp_ne_v32i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpne [[CMP:p[0-9]+]].b, [[PG]]/z, [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].b, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
|
||||||
; CHECK-NEXT: st1b { [[SEXT]].b }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i8>, <32 x i8>* %a
|
%op1 = load <32 x i8>, <32 x i8>* %a
|
||||||
%op2 = load <32 x i8>, <32 x i8>* %b
|
%op2 = load <32 x i8>, <32 x i8>* %b
|
||||||
%cmp = icmp ne <32 x i8> %op1, %op2
|
%cmp = icmp ne <32 x i8> %op1, %op2
|
||||||
|
@ -461,15 +475,16 @@ define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
|
||||||
; ICMP SGE
|
; ICMP SGE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
|
define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(4,0) #0 {
|
||||||
; CHECK-LABEL: icmp_sge_v32i16:
|
; CHECK-LABEL: icmp_sge_v32i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.h
|
||||||
; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_512-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i16>, <32 x i16>* %a
|
%op1 = load <32 x i16>, <32 x i16>* %a
|
||||||
%op2 = load <32 x i16>, <32 x i16>* %b
|
%op2 = load <32 x i16>, <32 x i16>* %b
|
||||||
%cmp = icmp sge <32 x i16> %op1, %op2
|
%cmp = icmp sge <32 x i16> %op1, %op2
|
||||||
|
@ -482,15 +497,16 @@ define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
|
||||||
; ICMP SGT
|
; ICMP SGT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
|
define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_sgt_v16i16:
|
; CHECK-LABEL: icmp_sgt_v16i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.h
|
||||||
; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x i16>, <16 x i16>* %a
|
%op1 = load <16 x i16>, <16 x i16>* %a
|
||||||
%op2 = load <16 x i16>, <16 x i16>* %b
|
%op2 = load <16 x i16>, <16 x i16>* %b
|
||||||
%cmp = icmp sgt <16 x i16> %op1, %op2
|
%cmp = icmp sgt <16 x i16> %op1, %op2
|
||||||
|
@ -503,15 +519,16 @@ define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
|
||||||
; ICMP SLE
|
; ICMP SLE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
|
define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) vscale_range(4,0) #0 {
|
||||||
; CHECK-LABEL: icmp_sle_v16i32:
|
; CHECK-LABEL: icmp_sle_v16i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpge [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpge p1.s, p0/z, z1.s, z0.s
|
||||||
; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_512-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x i32>, <16 x i32>* %a
|
%op1 = load <16 x i32>, <16 x i32>* %a
|
||||||
%op2 = load <16 x i32>, <16 x i32>* %b
|
%op2 = load <16 x i32>, <16 x i32>* %b
|
||||||
%cmp = icmp sle <16 x i32> %op1, %op2
|
%cmp = icmp sle <16 x i32> %op1, %op2
|
||||||
|
@ -524,15 +541,16 @@ define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
|
||||||
; ICMP SLT
|
; ICMP SLT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
|
define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_slt_v8i32:
|
; CHECK-LABEL: icmp_slt_v8i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmpgt [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP2]].s, [[OP1]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmpgt p1.s, p0/z, z1.s, z0.s
|
||||||
; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <8 x i32>, <8 x i32>* %a
|
%op1 = load <8 x i32>, <8 x i32>* %a
|
||||||
%op2 = load <8 x i32>, <8 x i32>* %b
|
%op2 = load <8 x i32>, <8 x i32>* %b
|
||||||
%cmp = icmp slt <8 x i32> %op1, %op2
|
%cmp = icmp slt <8 x i32> %op1, %op2
|
||||||
|
@ -545,15 +563,16 @@ define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
|
||||||
; ICMP UGE
|
; ICMP UGE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
|
define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) vscale_range(4,0) #0 {
|
||||||
; CHECK-LABEL: icmp_uge_v8i64:
|
; CHECK-LABEL: icmp_uge_v8i64:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmphs p1.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_512-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <8 x i64>, <8 x i64>* %a
|
%op1 = load <8 x i64>, <8 x i64>* %a
|
||||||
%op2 = load <8 x i64>, <8 x i64>* %b
|
%op2 = load <8 x i64>, <8 x i64>* %b
|
||||||
%cmp = icmp uge <8 x i64> %op1, %op2
|
%cmp = icmp uge <8 x i64> %op1, %op2
|
||||||
|
@ -566,15 +585,16 @@ define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
|
||||||
; ICMP UGT
|
; ICMP UGT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
|
define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: icmp_ugt_v4i64:
|
; CHECK-LABEL: icmp_ugt_v4i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
|
||||||
; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <4 x i64>, <4 x i64>* %a
|
%op1 = load <4 x i64>, <4 x i64>* %a
|
||||||
%op2 = load <4 x i64>, <4 x i64>* %b
|
%op2 = load <4 x i64>, <4 x i64>* %b
|
||||||
%cmp = icmp ugt <4 x i64> %op1, %op2
|
%cmp = icmp ugt <4 x i64> %op1, %op2
|
||||||
|
@ -587,15 +607,16 @@ define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
|
||||||
; ICMP ULE
|
; ICMP ULE
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
|
define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: icmp_ule_v16i64:
|
; CHECK-LABEL: icmp_ule_v16i64:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: cmphs [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmphs p1.d, p0/z, z1.d, z0.d
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <16 x i64>, <16 x i64>* %a
|
%op1 = load <16 x i64>, <16 x i64>* %a
|
||||||
%op2 = load <16 x i64>, <16 x i64>* %b
|
%op2 = load <16 x i64>, <16 x i64>* %b
|
||||||
%cmp = icmp ule <16 x i64> %op1, %op2
|
%cmp = icmp ule <16 x i64> %op1, %op2
|
||||||
|
@ -608,15 +629,16 @@ define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 {
|
||||||
; ICMP ULT
|
; ICMP ULT
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @icmp_ult_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
|
define void @icmp_ult_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: icmp_ult_v32i64:
|
; CHECK-LABEL: icmp_ult_v32i64:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: cmphi [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP2]].d, [[OP1]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
|
; CHECK-NEXT: cmphi p1.d, p0/z, z1.d, z0.d
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x0]
|
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load <32 x i64>, <32 x i64>* %a
|
%op1 = load <32 x i64>, <32 x i64>* %a
|
||||||
%op2 = load <32 x i64>, <32 x i64>* %b
|
%op2 = load <32 x i64>, <32 x i64>* %b
|
||||||
%cmp = icmp ult <32 x i64> %op1, %op2
|
%cmp = icmp ult <32 x i64> %op1, %op2
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,62 +1,50 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) #0 {
|
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v8i8:
|
; CHECK-LABEL: select_v8i8:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.8b, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
; CHECK-NEXT: dup v2.8b, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
|
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
|
||||||
ret <8 x i8> %sel
|
ret <8 x i8> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) #0 {
|
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v16i8:
|
; CHECK-LABEL: select_v16i8:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.16b, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
; CHECK-NEXT: dup v2.16b, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
|
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
|
||||||
ret <16 x i8> %sel
|
ret <16 x i8> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, i1 %mask) #0 {
|
define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v32i8:
|
; CHECK-LABEL: select_v32i8:
|
||||||
; CHECK: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; CHECK-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; CHECK-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[TMP1:z[0-9]+]].b, w[[AND]]
|
; CHECK-NEXT: ptrue p1.b
|
||||||
; CHECK-NEXT: and [[TMP2:z[0-9]+]].b, [[TMP1]].b, #0x1
|
; CHECK-NEXT: mov z2.b, w8
|
||||||
; CHECK-NEXT: cmpne [[PRES:p[0-9]+]].b, [[PG2]]/z, [[TMP2]].b, #0
|
; CHECK-NEXT: and z2.b, z2.b, #0x1
|
||||||
; CHECK-NEXT: sel [[RES:z[0-9]+]].b, [[PRES]], [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
|
||||||
; CHECK-NEXT: st1b { [[RES]].b }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <32 x i8>, <32 x i8>* %a
|
%op1 = load volatile <32 x i8>, <32 x i8>* %a
|
||||||
%op2 = load volatile <32 x i8>, <32 x i8>* %b
|
%op2 = load volatile <32 x i8>, <32 x i8>* %b
|
||||||
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
|
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
|
||||||
|
@ -65,18 +53,38 @@ define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, i1 %mask) #0 {
|
define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, i1 %mask) #0 {
|
||||||
; CHECK-LABEL: select_v64i8:
|
; VBITS_GE_256-LABEL: select_v64i8:
|
||||||
; VBITS_GE_512: and w[[AND:[0-9]+]], w2, #0x1
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].b, vl[[#min(VBYTES,64)]]
|
; VBITS_GE_256-NEXT: mov w8, #32
|
||||||
; VBITS_GE_512-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; VBITS_GE_512-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG1]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].b
|
; VBITS_GE_256-NEXT: ptrue p1.b
|
||||||
; VBITS_GE_512-NEXT: mov [[TMP1:z[0-9]+]].b, w[[AND]]
|
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
||||||
; VBITS_GE_512-NEXT: and [[TMP2:z[0-9]+]].b, [[TMP1]].b, #0x1
|
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpne [[PRES:p[0-9]+]].b, [[PG2]]/z, [[TMP2]].b, #0
|
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8]
|
||||||
; VBITS_GE_512-NEXT: sel [[RES:z[0-9]+]].b, [[PRES]], [[OP1]].b, [[OP2]].b
|
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG1]], [x0]
|
; VBITS_GE_256-NEXT: mov z4.b, w9
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1
|
||||||
|
; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0
|
||||||
|
; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b
|
||||||
|
; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b
|
||||||
|
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
||||||
|
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: select_v64i8:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: and w8, w2, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
|
||||||
|
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p1.b
|
||||||
|
; VBITS_GE_512-NEXT: mov z2.b, w8
|
||||||
|
; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0
|
||||||
|
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load volatile <64 x i8>, <64 x i8>* %a
|
%op1 = load volatile <64 x i8>, <64 x i8>* %a
|
||||||
%op2 = load volatile <64 x i8>, <64 x i8>* %b
|
%op2 = load volatile <64 x i8>, <64 x i8>* %b
|
||||||
%sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
|
%sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
|
||||||
|
@ -84,19 +92,20 @@ define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, i1 %mask) #0 {
|
define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: select_v128i8:
|
; CHECK-LABEL: select_v128i8:
|
||||||
; VBITS_GE_1024: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].b, vl[[#min(VBYTES,128)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_1024-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_1024-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[TMP1:z[0-9]+]].b, w[[AND]]
|
; CHECK-NEXT: ptrue p1.b
|
||||||
; VBITS_GE_1024-NEXT: and [[TMP2:z[0-9]+]].b, [[TMP1]].b, #0x1
|
; CHECK-NEXT: mov z2.b, w8
|
||||||
; VBITS_GE_1024-NEXT: cmpne [[PRES:p[0-9]+]].b, [[PG2]]/z, [[TMP2]].b, #0
|
; CHECK-NEXT: and z2.b, z2.b, #0x1
|
||||||
; VBITS_GE_1024-NEXT: sel [[RES:z[0-9]+]].b, [[PRES]], [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
|
||||||
; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <128 x i8>, <128 x i8>* %a
|
%op1 = load volatile <128 x i8>, <128 x i8>* %a
|
||||||
%op2 = load volatile <128 x i8>, <128 x i8>* %b
|
%op2 = load volatile <128 x i8>, <128 x i8>* %b
|
||||||
%sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
|
%sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
|
||||||
|
@ -104,19 +113,20 @@ define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, i1 %mask) #0 {
|
define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: select_v256i8:
|
; CHECK-LABEL: select_v256i8:
|
||||||
; VBITS_GE_2048: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].b, vl[[#min(VBYTES,256)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_2048-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_2048-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].b
|
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[TMP1:z[0-9]+]].b, w[[AND]]
|
; CHECK-NEXT: ptrue p1.b
|
||||||
; VBITS_GE_2048-NEXT: and [[TMP2:z[0-9]+]].b, [[TMP1]].b, #0x1
|
; CHECK-NEXT: mov z2.b, w8
|
||||||
; VBITS_GE_2048-NEXT: cmpne [[PRES:p[0-9]+]].b, [[PG2]]/z, [[TMP2]].b, #0
|
; CHECK-NEXT: and z2.b, z2.b, #0x1
|
||||||
; VBITS_GE_2048-NEXT: sel [[RES:z[0-9]+]].b, [[PRES]], [[OP1]].b, [[OP2]].b
|
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
|
||||||
; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <256 x i8>, <256 x i8>* %a
|
%op1 = load volatile <256 x i8>, <256 x i8>* %a
|
||||||
%op2 = load volatile <256 x i8>, <256 x i8>* %b
|
%op2 = load volatile <256 x i8>, <256 x i8>* %b
|
||||||
%sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
|
%sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
|
||||||
|
@ -125,42 +135,45 @@ define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) #0 {
|
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4i16:
|
; CHECK-LABEL: select_v4i16:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.4h, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
; CHECK-NEXT: dup v2.4h, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
|
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
|
||||||
ret <4 x i16> %sel
|
ret <4 x i16> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) #0 {
|
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v8i16:
|
; CHECK-LABEL: select_v8i16:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.8h, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
; CHECK-NEXT: dup v2.8h, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
|
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
|
||||||
ret <8 x i16> %sel
|
ret <8 x i16> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, i1 %mask) #0 {
|
define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v16i16:
|
; CHECK-LABEL: select_v16i16:
|
||||||
; CHECK: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; CHECK-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[TMP1:z[0-9]+]].h, w[[AND]]
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; CHECK-NEXT: and [[TMP2:z[0-9]+]].h, [[TMP1]].h, #0x1
|
; CHECK-NEXT: mov z2.h, w8
|
||||||
; CHECK-NEXT: cmpne [[PRES:p[0-9]+]].h, [[PG2]]/z, [[TMP2]].h, #0
|
; CHECK-NEXT: and z2.h, z2.h, #0x1
|
||||||
; CHECK-NEXT: sel [[RES:z[0-9]+]].h, [[PRES]], [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
; CHECK-NEXT: st1h { [[RES]].h }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <16 x i16>, <16 x i16>* %a
|
%op1 = load volatile <16 x i16>, <16 x i16>* %a
|
||||||
%op2 = load volatile <16 x i16>, <16 x i16>* %b
|
%op2 = load volatile <16 x i16>, <16 x i16>* %b
|
||||||
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
|
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
|
||||||
|
@ -169,18 +182,38 @@ define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, i1 %mask) #0 {
|
define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, i1 %mask) #0 {
|
||||||
; CHECK-LABEL: select_v32i16:
|
; VBITS_GE_256-LABEL: select_v32i16:
|
||||||
; VBITS_GE_512: and w[[AND:[0-9]+]], w2, #0x1
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG1]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].h
|
; VBITS_GE_256-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_512-NEXT: mov [[TMP1:z[0-9]+]].h, w[[AND]]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: and [[TMP2:z[0-9]+]].h, [[TMP1]].h, #0x1
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpne [[PRES:p[0-9]+]].h, [[PG2]]/z, [[TMP2]].h, #0
|
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: sel [[RES:z[0-9]+]].h, [[PRES]], [[OP1]].h, [[OP2]].h
|
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG1]], [x0]
|
; VBITS_GE_256-NEXT: mov z4.h, w9
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
|
||||||
|
; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0
|
||||||
|
; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h
|
||||||
|
; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: select_v32i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: and w8, w2, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p1.h
|
||||||
|
; VBITS_GE_512-NEXT: mov z2.h, w8
|
||||||
|
; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
|
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load volatile <32 x i16>, <32 x i16>* %a
|
%op1 = load volatile <32 x i16>, <32 x i16>* %a
|
||||||
%op2 = load volatile <32 x i16>, <32 x i16>* %b
|
%op2 = load volatile <32 x i16>, <32 x i16>* %b
|
||||||
%sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
|
%sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
|
||||||
|
@ -188,19 +221,20 @@ define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, i1 %mask) #0 {
|
define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: select_v64i16:
|
; CHECK-LABEL: select_v64i16:
|
||||||
; VBITS_GE_1024: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_1024-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[TMP1:z[0-9]+]].h, w[[AND]]
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_1024-NEXT: and [[TMP2:z[0-9]+]].h, [[TMP1]].h, #0x1
|
; CHECK-NEXT: mov z2.h, w8
|
||||||
; VBITS_GE_1024-NEXT: cmpne [[PRES:p[0-9]+]].h, [[PG2]]/z, [[TMP2]].h, #0
|
; CHECK-NEXT: and z2.h, z2.h, #0x1
|
||||||
; VBITS_GE_1024-NEXT: sel [[RES:z[0-9]+]].h, [[PRES]], [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <64 x i16>, <64 x i16>* %a
|
%op1 = load volatile <64 x i16>, <64 x i16>* %a
|
||||||
%op2 = load volatile <64 x i16>, <64 x i16>* %b
|
%op2 = load volatile <64 x i16>, <64 x i16>* %b
|
||||||
%sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
|
%sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
|
||||||
|
@ -208,19 +242,20 @@ define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, i1 %mask) #0 {
|
define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: select_v128i16:
|
; CHECK-LABEL: select_v128i16:
|
||||||
; VBITS_GE_2048: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_2048-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].h
|
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[TMP1:z[0-9]+]].h, w[[AND]]
|
; CHECK-NEXT: ptrue p1.h
|
||||||
; VBITS_GE_2048-NEXT: and [[TMP2:z[0-9]+]].h, [[TMP1]].h, #0x1
|
; CHECK-NEXT: mov z2.h, w8
|
||||||
; VBITS_GE_2048-NEXT: cmpne [[PRES:p[0-9]+]].h, [[PG2]]/z, [[TMP2]].h, #0
|
; CHECK-NEXT: and z2.h, z2.h, #0x1
|
||||||
; VBITS_GE_2048-NEXT: sel [[RES:z[0-9]+]].h, [[PRES]], [[OP1]].h, [[OP2]].h
|
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
|
||||||
; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <128 x i16>, <128 x i16>* %a
|
%op1 = load volatile <128 x i16>, <128 x i16>* %a
|
||||||
%op2 = load volatile <128 x i16>, <128 x i16>* %b
|
%op2 = load volatile <128 x i16>, <128 x i16>* %b
|
||||||
%sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
|
%sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
|
||||||
|
@ -229,42 +264,45 @@ define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) #0 {
|
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v2i32:
|
; CHECK-LABEL: select_v2i32:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.2s, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
; CHECK-NEXT: dup v2.2s, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
|
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
|
||||||
ret <2 x i32> %sel
|
ret <2 x i32> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) #0 {
|
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4i32:
|
; CHECK-LABEL: select_v4i32:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm w8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.4s, w8
|
; CHECK-NEXT: csetm w8, ne
|
||||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
; CHECK-NEXT: dup v2.4s, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
|
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
|
||||||
ret <4 x i32> %sel
|
ret <4 x i32> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) #0 {
|
define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v8i32:
|
; CHECK-LABEL: select_v8i32:
|
||||||
; CHECK: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[TMP1:z[0-9]+]].s, w[[AND]]
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; CHECK-NEXT: and [[TMP2:z[0-9]+]].s, [[TMP1]].s, #0x1
|
; CHECK-NEXT: mov z2.s, w8
|
||||||
; CHECK-NEXT: cmpne [[PRES:p[0-9]+]].s, [[PG2]]/z, [[TMP2]].s, #0
|
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||||
; CHECK-NEXT: sel [[RES:z[0-9]+]].s, [[PRES]], [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
; CHECK-NEXT: st1w { [[RES]].s }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <8 x i32>, <8 x i32>* %a
|
%op1 = load volatile <8 x i32>, <8 x i32>* %a
|
||||||
%op2 = load volatile <8 x i32>, <8 x i32>* %b
|
%op2 = load volatile <8 x i32>, <8 x i32>* %b
|
||||||
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
|
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
|
||||||
|
@ -273,18 +311,38 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
|
define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
|
||||||
; CHECK-LABEL: select_v16i32:
|
; VBITS_GE_256-LABEL: select_v16i32:
|
||||||
; VBITS_GE_512: and w[[AND:[0-9]+]], w2, #0x1
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s
|
; VBITS_GE_256-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_512-NEXT: mov [[TMP1:z[0-9]+]].s, w[[AND]]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: and [[TMP2:z[0-9]+]].s, [[TMP1]].s, #0x1
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpne [[PRES:p[0-9]+]].s, [[PG2]]/z, [[TMP2]].s, #0
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: sel [[RES:z[0-9]+]].s, [[PRES]], [[OP1]].s, [[OP2]].s
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG1]], [x0]
|
; VBITS_GE_256-NEXT: mov z4.s, w9
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
|
||||||
|
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
|
||||||
|
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
|
||||||
|
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: select_v16i32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: and w8, w2, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p1.s
|
||||||
|
; VBITS_GE_512-NEXT: mov z2.s, w8
|
||||||
|
; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
|
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load volatile <16 x i32>, <16 x i32>* %a
|
%op1 = load volatile <16 x i32>, <16 x i32>* %a
|
||||||
%op2 = load volatile <16 x i32>, <16 x i32>* %b
|
%op2 = load volatile <16 x i32>, <16 x i32>* %b
|
||||||
%sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
|
%sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
|
||||||
|
@ -292,19 +350,20 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) #0 {
|
define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: select_v32i32:
|
; CHECK-LABEL: select_v32i32:
|
||||||
; VBITS_GE_1024: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_1024-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[TMP1:z[0-9]+]].s, w[[AND]]
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_1024-NEXT: and [[TMP2:z[0-9]+]].s, [[TMP1]].s, #0x1
|
; CHECK-NEXT: mov z2.s, w8
|
||||||
; VBITS_GE_1024-NEXT: cmpne [[PRES:p[0-9]+]].s, [[PG2]]/z, [[TMP2]].s, #0
|
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||||
; VBITS_GE_1024-NEXT: sel [[RES:z[0-9]+]].s, [[PRES]], [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <32 x i32>, <32 x i32>* %a
|
%op1 = load volatile <32 x i32>, <32 x i32>* %a
|
||||||
%op2 = load volatile <32 x i32>, <32 x i32>* %b
|
%op2 = load volatile <32 x i32>, <32 x i32>* %b
|
||||||
%sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
|
%sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
|
||||||
|
@ -312,19 +371,20 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) #0 {
|
define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: select_v64i32:
|
; CHECK-LABEL: select_v64i32:
|
||||||
; VBITS_GE_2048: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_2048-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[TMP1:z[0-9]+]].s, w[[AND]]
|
; CHECK-NEXT: ptrue p1.s
|
||||||
; VBITS_GE_2048-NEXT: and [[TMP2:z[0-9]+]].s, [[TMP1]].s, #0x1
|
; CHECK-NEXT: mov z2.s, w8
|
||||||
; VBITS_GE_2048-NEXT: cmpne [[PRES:p[0-9]+]].s, [[PG2]]/z, [[TMP2]].s, #0
|
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||||
; VBITS_GE_2048-NEXT: sel [[RES:z[0-9]+]].s, [[PRES]], [[OP1]].s, [[OP2]].s
|
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
|
||||||
; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <64 x i32>, <64 x i32>* %a
|
%op1 = load volatile <64 x i32>, <64 x i32>* %a
|
||||||
%op2 = load volatile <64 x i32>, <64 x i32>* %b
|
%op2 = load volatile <64 x i32>, <64 x i32>* %b
|
||||||
%sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
|
%sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
|
||||||
|
@ -333,42 +393,45 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) #0 {
|
define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v1i64:
|
; CHECK-LABEL: select_v1i64:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm x8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: fmov d2, x8
|
; CHECK-NEXT: csetm x8, ne
|
||||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
; CHECK-NEXT: fmov d2, x8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
|
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
|
||||||
ret <1 x i64> %sel
|
ret <1 x i64> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) #0 {
|
define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v2i64:
|
; CHECK-LABEL: select_v2i64:
|
||||||
; CHECK: tst w0, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: csetm x8, ne
|
; CHECK-NEXT: tst w0, #0x1
|
||||||
; CHECK-NEXT: dup v2.2d, x8
|
; CHECK-NEXT: csetm x8, ne
|
||||||
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
; CHECK-NEXT: dup v2.2d, x8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
|
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
|
||||||
ret <2 x i64> %sel
|
ret <2 x i64> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) #0 {
|
define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: select_v4i64:
|
; CHECK-LABEL: select_v4i64:
|
||||||
; CHECK: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; CHECK-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; CHECK-NEXT: mov [[TMP1:z[0-9]+]].d, x[[AND]]
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; CHECK-NEXT: and [[TMP2:z[0-9]+]].d, [[TMP1]].d, #0x1
|
; CHECK-NEXT: mov z2.d, x8
|
||||||
; CHECK-NEXT: cmpne [[PRES:p[0-9]+]].d, [[PG2]]/z, [[TMP2]].d, #0
|
; CHECK-NEXT: and z2.d, z2.d, #0x1
|
||||||
; CHECK-NEXT: sel [[RES:z[0-9]+]].d, [[PRES]], [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
; CHECK-NEXT: st1d { [[RES]].d }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <4 x i64>, <4 x i64>* %a
|
%op1 = load volatile <4 x i64>, <4 x i64>* %a
|
||||||
%op2 = load volatile <4 x i64>, <4 x i64>* %b
|
%op2 = load volatile <4 x i64>, <4 x i64>* %b
|
||||||
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
|
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
|
||||||
|
@ -377,18 +440,38 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
|
define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
|
||||||
; CHECK-LABEL: select_v8i64:
|
; VBITS_GE_256-LABEL: select_v8i64:
|
||||||
; VBITS_GE_512: and w[[AND:[0-9]+]], w2, #0x1
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: and w9, w2, #0x1
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
|
; VBITS_GE_256-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_512-NEXT: mov [[TMP1:z[0-9]+]].d, x[[AND]]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: and [[TMP2:z[0-9]+]].d, [[TMP1]].d, #0x1
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: cmpne [[PRES:p[0-9]+]].d, [[PG2]]/z, [[TMP2]].d, #0
|
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: sel [[RES:z[0-9]+]].d, [[PRES]], [[OP1]].d, [[OP2]].d
|
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
|
||||||
; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG1]], [x0]
|
; VBITS_GE_256-NEXT: mov z4.d, x9
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
|
||||||
|
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
|
||||||
|
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
|
||||||
|
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: select_v8i64:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: and w8, w2, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p1.d
|
||||||
|
; VBITS_GE_512-NEXT: mov z2.d, x8
|
||||||
|
; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
|
||||||
|
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
|
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op1 = load volatile <8 x i64>, <8 x i64>* %a
|
%op1 = load volatile <8 x i64>, <8 x i64>* %a
|
||||||
%op2 = load volatile <8 x i64>, <8 x i64>* %b
|
%op2 = load volatile <8 x i64>, <8 x i64>* %b
|
||||||
%sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
|
%sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
|
||||||
|
@ -396,19 +479,20 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) #0 {
|
define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: select_v16i64:
|
; CHECK-LABEL: select_v16i64:
|
||||||
; VBITS_GE_1024: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: mov [[TMP1:z[0-9]+]].d, x[[AND]]
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_1024-NEXT: and [[TMP2:z[0-9]+]].d, [[TMP1]].d, #0x1
|
; CHECK-NEXT: mov z2.d, x8
|
||||||
; VBITS_GE_1024-NEXT: cmpne [[PRES:p[0-9]+]].d, [[PG2]]/z, [[TMP2]].d, #0
|
; CHECK-NEXT: and z2.d, z2.d, #0x1
|
||||||
; VBITS_GE_1024-NEXT: sel [[RES:z[0-9]+]].d, [[PRES]], [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <16 x i64>, <16 x i64>* %a
|
%op1 = load volatile <16 x i64>, <16 x i64>* %a
|
||||||
%op2 = load volatile <16 x i64>, <16 x i64>* %b
|
%op2 = load volatile <16 x i64>, <16 x i64>* %b
|
||||||
%sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
|
%sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
|
||||||
|
@ -416,19 +500,20 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) #0 {
|
define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: select_v32i64:
|
; CHECK-LABEL: select_v32i64:
|
||||||
; VBITS_GE_2048: and w[[AND:[0-9]+]], w2, #0x1
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]]
|
; CHECK-NEXT: and w8, w2, #0x1
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG1]]/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: mov [[TMP1:z[0-9]+]].d, x[[AND]]
|
; CHECK-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_2048-NEXT: and [[TMP2:z[0-9]+]].d, [[TMP1]].d, #0x1
|
; CHECK-NEXT: mov z2.d, x8
|
||||||
; VBITS_GE_2048-NEXT: cmpne [[PRES:p[0-9]+]].d, [[PG2]]/z, [[TMP2]].d, #0
|
; CHECK-NEXT: and z2.d, z2.d, #0x1
|
||||||
; VBITS_GE_2048-NEXT: sel [[RES:z[0-9]+]].d, [[PRES]], [[OP1]].d, [[OP2]].d
|
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x0]
|
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op1 = load volatile <32 x i64>, <32 x i64>* %a
|
%op1 = load volatile <32 x i64>, <32 x i64>* %a
|
||||||
%op2 = load volatile <32 x i64>, <32 x i64>* %b
|
%op2 = load volatile <32 x i64>, <32 x i64>* %b
|
||||||
%sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
|
%sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,35 +1,29 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
|
||||||
; VBYTES represents the useful byte size of a vector register from the code
|
|
||||||
; generator's point of view. It is clamped to power-of-2 values because
|
|
||||||
; only power-of-2 vector lengths are considered legal, regardless of the
|
|
||||||
; user specified vector length.
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
|
define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v2f32:
|
; CHECK-LABEL: load_v2f32:
|
||||||
; CHECK: ldr d0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: ret
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load <2 x float>, <2 x float>* %a
|
%load = load <2 x float>, <2 x float>* %a
|
||||||
ret <2 x float> %load
|
ret <2 x float> %load
|
||||||
}
|
}
|
||||||
|
@ -37,66 +31,164 @@ define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @load_v4f32(<4 x float>* %a) #0 {
|
define <4 x float> @load_v4f32(<4 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v4f32:
|
; CHECK-LABEL: load_v4f32:
|
||||||
; CHECK: ldr q0, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: ret
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load <4 x float>, <4 x float>* %a
|
%load = load <4 x float>, <4 x float>* %a
|
||||||
ret <4 x float> %load
|
ret <4 x float> %load
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x float> @load_v8f32(<8 x float>* %a) #0 {
|
define <8 x float> @load_v8f32(<8 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v8f32:
|
; CHECK-LABEL: load_v8f32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK: ret
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%load = load <8 x float>, <8 x float>* %a
|
%load = load <8 x float>, <8 x float>* %a
|
||||||
ret <8 x float> %load
|
ret <8 x float> %load
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
|
define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v16f32:
|
; VBITS_GE_256-LABEL: load_v16f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x9, #8
|
||||||
; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: load_v16f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: load_v16f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: load_v16f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
%load = load <16 x float>, <16 x float>* %a
|
%load = load <16 x float>, <16 x float>* %a
|
||||||
ret <16 x float> %load
|
ret <16 x float> %load
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
|
define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v32f32:
|
; VBITS_GE_256-LABEL: load_v32f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x9, #16
|
||||||
; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: mov x10, #24
|
||||||
; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x11, #8
|
||||||
; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: load_v32f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: mov x9, #16
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: load_v32f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: load_v32f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
%load = load <32 x float>, <32 x float>* %a
|
%load = load <32 x float>, <32 x float>* %a
|
||||||
ret <32 x float> %load
|
ret <32 x float> %load
|
||||||
}
|
}
|
||||||
|
|
||||||
define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
|
define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: load_v64f32:
|
; VBITS_GE_256-LABEL: load_v64f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x9, #8
|
||||||
; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: mov x10, #48
|
||||||
; VBITS_LE_1024-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x11, #56
|
||||||
; VBITS_LE_512-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
|
; VBITS_GE_256-NEXT: mov x12, #32
|
||||||
; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x13, #40
|
||||||
; VBITS_LE_512-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
|
; VBITS_GE_256-NEXT: mov x14, #16
|
||||||
; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x15, #24
|
||||||
; VBITS_LE_256-DAG: mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A4]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]]
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x11, lsl #2]
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A5]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x12, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]]
|
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x13, lsl #2]
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A6]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x14, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]]
|
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x15, lsl #2]
|
||||||
; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A7]], lsl #2]
|
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x9, lsl #2]
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x13, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x12, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x15, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x14, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: load_v64f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: mov x9, #32
|
||||||
|
; VBITS_GE_512-NEXT: mov x10, #48
|
||||||
|
; VBITS_GE_512-NEXT: mov x11, #16
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x8]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: load_v64f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: mov x9, #32
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: load_v64f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
||||||
|
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
%load = load <64 x float>, <64 x float>* %a
|
%load = load <64 x float>, <64 x float>* %a
|
||||||
ret <64 x float> %load
|
ret <64 x float> %load
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,7 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -20,7 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
; LD1B
|
; LD1B
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 {
|
define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v2i8:
|
; CHECK-LABEL: masked_gather_v2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x1]
|
; CHECK-NEXT: ldr q0, [x1]
|
||||||
|
@ -36,7 +25,7 @@ define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 {
|
define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v4i8:
|
; CHECK-LABEL: masked_gather_v4i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -54,21 +43,21 @@ define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
|
define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
|
||||||
; VBITS_EQ_256-LABEL: masked_gather_v8i8:
|
; VBITS_GE_256-LABEL: masked_gather_v8i8:
|
||||||
; VBITS_EQ_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_EQ_256-NEXT: ld1b { z1.d }, p0/z, [z1.d]
|
; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d]
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
; VBITS_EQ_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b
|
; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b
|
||||||
; VBITS_EQ_256-NEXT: str d0, [x0]
|
; VBITS_GE_256-NEXT: str d0, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: masked_gather_v8i8:
|
; VBITS_GE_512-LABEL: masked_gather_v8i8:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -86,17 +75,17 @@ define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) #0 {
|
define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: masked_gather_v16i8:
|
; CHECK-LABEL: masked_gather_v16i8:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.b, z0.b, z0.b
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
; VBITS_GE_1024-NEXT: str q0, [x0]
|
; CHECK-NEXT: str q0, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <16 x i8*>, <16 x i8*>* %b
|
%ptrs = load <16 x i8*>, <16 x i8*>* %b
|
||||||
%vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
|
||||||
|
@ -104,18 +93,18 @@ define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) #0 {
|
define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: masked_gather_v32i8:
|
; CHECK-LABEL: masked_gather_v32i8:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.b, z0.b, z0.b
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x0]
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <32 x i8*>, <32 x i8*>* %b
|
%ptrs = load <32 x i8*>, <32 x i8*>* %b
|
||||||
%vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
|
@ -129,7 +118,7 @@ define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) #0 {
|
||||||
; LD1H
|
; LD1H
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 {
|
define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v2i16:
|
; CHECK-LABEL: masked_gather_v2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x1]
|
; CHECK-NEXT: ldr q0, [x1]
|
||||||
|
@ -145,7 +134,7 @@ define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) #0 {
|
define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v4i16:
|
; CHECK-LABEL: masked_gather_v4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -162,21 +151,21 @@ define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
|
define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
|
||||||
; VBITS_EQ_256-LABEL: masked_gather_v8i16:
|
; VBITS_GE_256-LABEL: masked_gather_v8i16:
|
||||||
; VBITS_EQ_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_EQ_256-NEXT: ld1h { z1.d }, p0/z, [z1.d]
|
; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z1.d]
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
; VBITS_EQ_256-NEXT: mov v1.d[1], v0.d[0]
|
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
|
||||||
; VBITS_EQ_256-NEXT: str q1, [x0]
|
; VBITS_GE_256-NEXT: str q1, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: masked_gather_v8i16:
|
; VBITS_GE_512-LABEL: masked_gather_v8i16:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -193,17 +182,17 @@ define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) #0 {
|
define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: masked_gather_v16i16:
|
; CHECK-LABEL: masked_gather_v16i16:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <16 x i16*>, <16 x i16*>* %b
|
%ptrs = load <16 x i16*>, <16 x i16*>* %b
|
||||||
%vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef)
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef)
|
||||||
|
@ -211,17 +200,17 @@ define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) #0 {
|
define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: masked_gather_v32i16:
|
; CHECK-LABEL: masked_gather_v32i16:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl32
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <32 x i16*>, <32 x i16*>* %b
|
%ptrs = load <32 x i16*>, <32 x i16*>* %b
|
||||||
%vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
|
@ -235,7 +224,7 @@ define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) #0 {
|
||||||
; LD1W
|
; LD1W
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) #0 {
|
define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v2i32:
|
; CHECK-LABEL: masked_gather_v2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x1]
|
; CHECK-NEXT: ldr q0, [x1]
|
||||||
|
@ -250,7 +239,7 @@ define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) #0 {
|
define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v4i32:
|
; CHECK-LABEL: masked_gather_v4i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -266,21 +255,21 @@ define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
|
define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
|
||||||
; VBITS_EQ_256-LABEL: masked_gather_v8i32:
|
; VBITS_GE_256-LABEL: masked_gather_v8i32:
|
||||||
; VBITS_EQ_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_EQ_256-NEXT: ld1w { z1.d }, p0/z, [z1.d]
|
; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [z1.d]
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.s, vl4
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: masked_gather_v8i32:
|
; VBITS_GE_512-LABEL: masked_gather_v8i32:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -297,16 +286,16 @@ define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) #0 {
|
define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: masked_gather_v16i32:
|
; CHECK-LABEL: masked_gather_v16i32:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <16 x i32*>, <16 x i32*>* %b
|
%ptrs = load <16 x i32*>, <16 x i32*>* %b
|
||||||
%vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
|
||||||
|
@ -314,16 +303,16 @@ define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) #0 {
|
define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: masked_gather_v32i32:
|
; CHECK-LABEL: masked_gather_v32i32:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <32 x i32*>, <32 x i32*>* %b
|
%ptrs = load <32 x i32*>, <32 x i32*>* %b
|
||||||
%vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
|
@ -337,7 +326,7 @@ define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) #0 {
|
||||||
; LD1D
|
; LD1D
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) #0 {
|
define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v2i64:
|
; CHECK-LABEL: masked_gather_v2i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x1]
|
; CHECK-NEXT: ldr q0, [x1]
|
||||||
|
@ -351,7 +340,7 @@ define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) #0 {
|
define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_gather_v4i64:
|
; CHECK-LABEL: masked_gather_v4i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -366,17 +355,17 @@ define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
|
define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
|
||||||
; VBITS_EQ_256-LABEL: masked_gather_v8i64:
|
; VBITS_GE_256-LABEL: masked_gather_v8i64:
|
||||||
; VBITS_EQ_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: mov x8, #4
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [z1.d]
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [z1.d]
|
||||||
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
;
|
||||||
; VBITS_GE_512-LABEL: masked_gather_v8i64:
|
; VBITS_GE_512-LABEL: masked_gather_v8i64:
|
||||||
; VBITS_GE_512: // %bb.0:
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
@ -391,14 +380,14 @@ define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) #0 {
|
define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_1024-LABEL: masked_gather_v16i64:
|
; CHECK-LABEL: masked_gather_v16i64:
|
||||||
; VBITS_GE_1024: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <16 x i64*>, <16 x i64*>* %b
|
%ptrs = load <16 x i64*>, <16 x i64*>* %b
|
||||||
%vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef)
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef)
|
||||||
|
@ -406,14 +395,14 @@ define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_gather_v32i64(<32 x i64>* %a, <32 x i64*>* %b) #0 {
|
define void @masked_gather_v32i64(<32 x i64>* %a, <32 x i64*>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_2048-LABEL: masked_gather_v32i64:
|
; CHECK-LABEL: masked_gather_v32i64:
|
||||||
; VBITS_GE_2048: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = load <32 x i64*>, <32 x i64*>* %b
|
%ptrs = load <32 x i64*>, <32 x i64*>* %b
|
||||||
%vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
%vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,28 +1,15 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
;;
|
;
|
||||||
;; Masked Stores
|
; Masked Stores
|
||||||
;;
|
;
|
||||||
define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
|
|
||||||
|
define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_store_v2f16:
|
; CHECK-LABEL: masked_store_v2f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr s1, [x0]
|
; CHECK-NEXT: ldr s1, [x0]
|
||||||
|
@ -52,8 +39,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) vscale_range(2,0) #0 {
|
||||||
define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) #0 {
|
|
||||||
; CHECK-LABEL: masked_store_v2f32:
|
; CHECK-LABEL: masked_store_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr d0, [x0]
|
; CHECK-NEXT: ldr d0, [x0]
|
||||||
|
@ -70,7 +56,7 @@ define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) #0 {
|
define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_store_v4f32:
|
; CHECK-LABEL: masked_store_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x0]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
@ -87,7 +73,7 @@ define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) #0 {
|
define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: masked_store_v8f32:
|
; CHECK-LABEL: masked_store_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -133,39 +119,15 @@ define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) #0 {
|
define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: masked_store_v32f32:
|
; CHECK-LABEL: masked_store_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z2.s, z5.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z1.s, z4.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.s, p0/z, z0.s, z6.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p0.s, p0/z, z3.s, z7.s
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p3, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p2, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p1, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: masked_store_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <32 x float>, <32 x float>* %ap
|
%a = load <32 x float>, <32 x float>* %ap
|
||||||
%b = load <32 x float>, <32 x float>* %bp
|
%b = load <32 x float>, <32 x float>* %bp
|
||||||
%mask = fcmp oeq <32 x float> %a, %b
|
%mask = fcmp oeq <32 x float> %a, %b
|
||||||
|
@ -173,59 +135,15 @@ define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) #0 {
|
define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: masked_store_v64f32:
|
; CHECK-LABEL: masked_store_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #56
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #40
|
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #32
|
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x13, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x14, #8
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z16.s }, p0/z, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z17.s }, p0/z, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z18.s }, p0/z, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z19.s }, p0/z, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z20.s }, p0/z, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z21.s }, p0/z, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z22.s }, p0/z, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z23.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z6.s, z17.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z5.s, z16.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p3.s, p0/z, z4.s, z19.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p4.s, p0/z, z3.s, z18.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p5.s, p0/z, z2.s, z21.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p6.s, p0/z, z1.s, z20.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p7.s, p0/z, z0.s, z22.s
|
|
||||||
; VBITS_GE_256-NEXT: fcmeq p0.s, p0/z, z7.s, z23.s
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p7, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p6, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p5, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p4, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p3, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p2, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p1, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: masked_store_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%a = load <64 x float>, <64 x float>* %ap
|
%a = load <64 x float>, <64 x float>* %ap
|
||||||
%b = load <64 x float>, <64 x float>* %bp
|
%b = load <64 x float>, <64 x float>* %bp
|
||||||
%mask = fcmp oeq <64 x float> %a, %b
|
%mask = fcmp oeq <64 x float> %a, %b
|
||||||
|
@ -266,7 +184,6 @@ define void @masked_store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i8>
|
||||||
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
|
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
|
||||||
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x2]
|
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x2]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
%a = load <8 x i64>, <8 x i64>* %ap
|
%a = load <8 x i64>, <8 x i64>* %ap
|
||||||
%b = load <8 x i64>, <8 x i64>* %bp
|
%b = load <8 x i64>, <8 x i64>* %bp
|
||||||
%mask = icmp eq <8 x i64> %a, %b
|
%mask = icmp eq <8 x i64> %a, %b
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -163,27 +163,27 @@ define void @test_revwv8i32v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
|
||||||
|
|
||||||
; REVH pattern for shuffle v32i16 with 256 bits and 512 bits SVE.
|
; REVH pattern for shuffle v32i16 with 256 bits and 512 bits SVE.
|
||||||
define void @test_revhv32i16(<32 x i16>* %a) #0 {
|
define void @test_revhv32i16(<32 x i16>* %a) #0 {
|
||||||
; VBITS_EQ_256-LABEL: test_revhv32i16:
|
|
||||||
; VBITS_EQ_256: // %bb.0:
|
|
||||||
; VBITS_EQ_256-NEXT: mov x8, #16
|
|
||||||
; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
|
|
||||||
; VBITS_EQ_256-NEXT: ptrue p1.d
|
|
||||||
; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
||||||
; VBITS_EQ_256-NEXT: revh z0.d, p1/m, z0.d
|
|
||||||
; VBITS_EQ_256-NEXT: revh z1.d, p1/m, z1.d
|
|
||||||
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
|
|
||||||
; VBITS_EQ_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_256-LABEL: test_revhv32i16:
|
; VBITS_GE_256-LABEL: test_revhv32i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl32
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_256-NEXT: ptrue p1.d
|
; VBITS_GE_256-NEXT: ptrue p1.d
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: revh z0.d, p1/m, z0.d
|
; VBITS_GE_256-NEXT: revh z0.d, p1/m, z0.d
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
|
; VBITS_GE_256-NEXT: revh z1.d, p1/m, z1.d
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: test_revhv32i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p1.d
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: revh z0.d, p1/m, z0.d
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%tmp1 = load <32 x i16>, <32 x i16>* %a
|
%tmp1 = load <32 x i16>, <32 x i16>* %a
|
||||||
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
|
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
|
||||||
store <32 x i16> %tmp2, <32 x i16>* %a
|
store <32 x i16> %tmp2, <32 x i16>* %a
|
||||||
|
|
|
@ -1,54 +1,46 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
;
|
;
|
||||||
; RBIT
|
; RBIT
|
||||||
;
|
;
|
||||||
|
|
||||||
define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) #0 {
|
define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v8i8:
|
; CHECK-LABEL: bitreverse_v8i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].b, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.b, [[PG]]/m, z0.b
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.b, vl8
|
||||||
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
|
%res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
|
||||||
ret <8 x i8> %res
|
ret <8 x i8> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) #0 {
|
define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v16i8:
|
; CHECK-LABEL: bitreverse_v16i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].b, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.b, [[PG]]/m, z0.b
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.b, vl16
|
||||||
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
|
%res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
|
||||||
ret <16 x i8> %res
|
ret <16 x i8> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v32i8(<32 x i8>* %a) #0 {
|
define void @bitreverse_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v32i8:
|
; CHECK-LABEL: bitreverse_v32i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; CHECK-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <32 x i8>, <32 x i8>* %a
|
%op = load <32 x i8>, <32 x i8>* %a
|
||||||
%res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op)
|
%res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op)
|
||||||
store <32 x i8> %res, <32 x i8>* %a
|
store <32 x i8> %res, <32 x i8>* %a
|
||||||
|
@ -56,80 +48,91 @@ define void @bitreverse_v32i8(<32 x i8>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v64i8(<64 x i8>* %a) #0 {
|
define void @bitreverse_v64i8(<64 x i8>* %a) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v64i8:
|
; VBITS_GE_256-LABEL: bitreverse_v64i8:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov w8, #32
|
||||||
; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
|
; VBITS_GE_256-NEXT: rbit z1.b, p0/m, z1.b
|
||||||
|
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
||||||
|
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
;
|
;
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_512-LABEL: bitreverse_v64i8:
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
|
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
|
; VBITS_GE_512-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
|
; VBITS_GE_512-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
|
|
||||||
; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
|
|
||||||
; VBITS_EQ_256-NEXT: ret
|
|
||||||
%op = load <64 x i8>, <64 x i8>* %a
|
%op = load <64 x i8>, <64 x i8>* %a
|
||||||
%res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op)
|
%res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op)
|
||||||
store <64 x i8> %res, <64 x i8>* %a
|
store <64 x i8> %res, <64 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v128i8(<128 x i8>* %a) #0 {
|
define void @bitreverse_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v128i8:
|
; CHECK-LABEL: bitreverse_v128i8:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <128 x i8>, <128 x i8>* %a
|
%op = load <128 x i8>, <128 x i8>* %a
|
||||||
%res = call <128 x i8> @llvm.bitreverse.v128i8(<128 x i8> %op)
|
%res = call <128 x i8> @llvm.bitreverse.v128i8(<128 x i8> %op)
|
||||||
store <128 x i8> %res, <128 x i8>* %a
|
store <128 x i8> %res, <128 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v256i8(<256 x i8>* %a) #0 {
|
define void @bitreverse_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v256i8:
|
; CHECK-LABEL: bitreverse_v256i8:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <256 x i8>, <256 x i8>* %a
|
%op = load <256 x i8>, <256 x i8>* %a
|
||||||
%res = call <256 x i8> @llvm.bitreverse.v256i8(<256 x i8> %op)
|
%res = call <256 x i8> @llvm.bitreverse.v256i8(<256 x i8> %op)
|
||||||
store <256 x i8> %res, <256 x i8>* %a
|
store <256 x i8> %res, <256 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) #0 {
|
define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v4i16:
|
; CHECK-LABEL: bitreverse_v4i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.h, [[PG]]/m, z0.h
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.h, vl4
|
||||||
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
|
%res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
|
||||||
ret <4 x i16> %res
|
ret <4 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) #0 {
|
define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v8i16:
|
; CHECK-LABEL: bitreverse_v8i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.h, [[PG]]/m, z0.h
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.h, vl8
|
||||||
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
|
%res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v16i16(<16 x i16>* %a) #0 {
|
define void @bitreverse_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v16i16:
|
; CHECK-LABEL: bitreverse_v16i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <16 x i16>, <16 x i16>* %a
|
%op = load <16 x i16>, <16 x i16>* %a
|
||||||
%res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op)
|
%res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op)
|
||||||
store <16 x i16> %res, <16 x i16>* %a
|
store <16 x i16> %res, <16 x i16>* %a
|
||||||
|
@ -137,80 +140,91 @@ define void @bitreverse_v16i16(<16 x i16>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v32i16(<32 x i16>* %a) #0 {
|
define void @bitreverse_v32i16(<32 x i16>* %a) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v32i16:
|
; VBITS_GE_256-LABEL: bitreverse_v32i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: rbit z1.h, p0/m, z1.h
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
|
;
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
|
; VBITS_GE_512-LABEL: bitreverse_v32i16:
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <32 x i16>, <32 x i16>* %a
|
%op = load <32 x i16>, <32 x i16>* %a
|
||||||
%res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op)
|
%res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op)
|
||||||
store <32 x i16> %res, <32 x i16>* %a
|
store <32 x i16> %res, <32 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v64i16(<64 x i16>* %a) #0 {
|
define void @bitreverse_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v64i16:
|
; CHECK-LABEL: bitreverse_v64i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <64 x i16>, <64 x i16>* %a
|
%op = load <64 x i16>, <64 x i16>* %a
|
||||||
%res = call <64 x i16> @llvm.bitreverse.v64i16(<64 x i16> %op)
|
%res = call <64 x i16> @llvm.bitreverse.v64i16(<64 x i16> %op)
|
||||||
store <64 x i16> %res, <64 x i16>* %a
|
store <64 x i16> %res, <64 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v128i16(<128 x i16>* %a) #0 {
|
define void @bitreverse_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v128i16:
|
; CHECK-LABEL: bitreverse_v128i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <128 x i16>, <128 x i16>* %a
|
%op = load <128 x i16>, <128 x i16>* %a
|
||||||
%res = call <128 x i16> @llvm.bitreverse.v128i16(<128 x i16> %op)
|
%res = call <128 x i16> @llvm.bitreverse.v128i16(<128 x i16> %op)
|
||||||
store <128 x i16> %res, <128 x i16>* %a
|
store <128 x i16> %res, <128 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) #0 {
|
define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v2i32:
|
; CHECK-LABEL: bitreverse_v2i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl2
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.s, [[PG]]/m, z0.s
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.s, vl2
|
||||||
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
|
%res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
|
||||||
ret <2 x i32> %res
|
ret <2 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) #0 {
|
define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v4i32:
|
; CHECK-LABEL: bitreverse_v4i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.s, [[PG]]/m, z0.s
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.s, vl4
|
||||||
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
|
%res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
|
||||||
ret <4 x i32> %res
|
ret <4 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v8i32(<8 x i32>* %a) #0 {
|
define void @bitreverse_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v8i32:
|
; CHECK-LABEL: bitreverse_v8i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <8 x i32>, <8 x i32>* %a
|
%op = load <8 x i32>, <8 x i32>* %a
|
||||||
%res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op)
|
%res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op)
|
||||||
store <8 x i32> %res, <8 x i32>* %a
|
store <8 x i32> %res, <8 x i32>* %a
|
||||||
|
@ -218,80 +232,91 @@ define void @bitreverse_v8i32(<8 x i32>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v16i32(<16 x i32>* %a) #0 {
|
define void @bitreverse_v16i32(<16 x i32>* %a) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v16i32:
|
; VBITS_GE_256-LABEL: bitreverse_v16i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: rbit z1.s, p0/m, z1.s
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
|
;
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
|
; VBITS_GE_512-LABEL: bitreverse_v16i32:
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <16 x i32>, <16 x i32>* %a
|
%op = load <16 x i32>, <16 x i32>* %a
|
||||||
%res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op)
|
%res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op)
|
||||||
store <16 x i32> %res, <16 x i32>* %a
|
store <16 x i32> %res, <16 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v32i32(<32 x i32>* %a) #0 {
|
define void @bitreverse_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v32i32:
|
; CHECK-LABEL: bitreverse_v32i32:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <32 x i32>, <32 x i32>* %a
|
%op = load <32 x i32>, <32 x i32>* %a
|
||||||
%res = call <32 x i32> @llvm.bitreverse.v32i32(<32 x i32> %op)
|
%res = call <32 x i32> @llvm.bitreverse.v32i32(<32 x i32> %op)
|
||||||
store <32 x i32> %res, <32 x i32>* %a
|
store <32 x i32> %res, <32 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v64i32(<64 x i32>* %a) #0 {
|
define void @bitreverse_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v64i32:
|
; CHECK-LABEL: bitreverse_v64i32:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <64 x i32>, <64 x i32>* %a
|
%op = load <64 x i32>, <64 x i32>* %a
|
||||||
%res = call <64 x i32> @llvm.bitreverse.v64i32(<64 x i32> %op)
|
%res = call <64 x i32> @llvm.bitreverse.v64i32(<64 x i32> %op)
|
||||||
store <64 x i32> %res, <64 x i32>* %a
|
store <64 x i32> %res, <64 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) #0 {
|
define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v1i64:
|
; CHECK-LABEL: bitreverse_v1i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.d, [[PG]]/m, z0.d
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.d, vl1
|
||||||
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
|
%res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
|
||||||
ret <1 x i64> %res
|
ret <1 x i64> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) #0 {
|
define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v2i64:
|
; CHECK-LABEL: bitreverse_v2i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rbit z0.d, [[PG]]/m, z0.d
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ptrue p0.d, vl2
|
||||||
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
|
%res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v4i64(<4 x i64>* %a) #0 {
|
define void @bitreverse_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v4i64:
|
; CHECK-LABEL: bitreverse_v4i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <4 x i64>, <4 x i64>* %a
|
%op = load <4 x i64>, <4 x i64>* %a
|
||||||
%res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op)
|
%res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op)
|
||||||
store <4 x i64> %res, <4 x i64>* %a
|
store <4 x i64> %res, <4 x i64>* %a
|
||||||
|
@ -299,49 +324,53 @@ define void @bitreverse_v4i64(<4 x i64>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v8i64(<8 x i64>* %a) #0 {
|
define void @bitreverse_v8i64(<8 x i64>* %a) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v8i64:
|
; VBITS_GE_256-LABEL: bitreverse_v8i64:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: rbit z1.d, p0/m, z1.d
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
|
;
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
|
; VBITS_GE_512-LABEL: bitreverse_v8i64:
|
||||||
; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <8 x i64>, <8 x i64>* %a
|
%op = load <8 x i64>, <8 x i64>* %a
|
||||||
%res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op)
|
%res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op)
|
||||||
store <8 x i64> %res, <8 x i64>* %a
|
store <8 x i64> %res, <8 x i64>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v16i64(<16 x i64>* %a) #0 {
|
define void @bitreverse_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v16i64:
|
; CHECK-LABEL: bitreverse_v16i64:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <16 x i64>, <16 x i64>* %a
|
%op = load <16 x i64>, <16 x i64>* %a
|
||||||
%res = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> %op)
|
%res = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> %op)
|
||||||
store <16 x i64> %res, <16 x i64>* %a
|
store <16 x i64> %res, <16 x i64>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bitreverse_v32i64(<32 x i64>* %a) #0 {
|
define void @bitreverse_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bitreverse_v32i64:
|
; CHECK-LABEL: bitreverse_v32i64:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: rbit [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <32 x i64>, <32 x i64>* %a
|
%op = load <32 x i64>, <32 x i64>* %a
|
||||||
%res = call <32 x i64> @llvm.bitreverse.v32i64(<32 x i64> %op)
|
%res = call <32 x i64> @llvm.bitreverse.v32i64(<32 x i64> %op)
|
||||||
store <32 x i64> %res, <32 x i64>* %a
|
store <32 x i64> %res, <32 x i64>* %a
|
||||||
|
@ -353,30 +382,33 @@ define void @bitreverse_v32i64(<32 x i64>* %a) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 {
|
define <4 x i16> @bswap_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v4i16:
|
; CHECK-LABEL: bswap_v4i16:
|
||||||
; CHECK: rev16 v0.8b, v0.8b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev16 v0.8b, v0.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
|
%res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
|
||||||
ret <4 x i16> %res
|
ret <4 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 {
|
define <8 x i16> @bswap_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v8i16:
|
; CHECK-LABEL: bswap_v8i16:
|
||||||
; CHECK: rev16 v0.16b, v0.16b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
|
%res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v16i16(<16 x i16>* %a) #0 {
|
define void @bswap_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v16i16:
|
; CHECK-LABEL: bswap_v16i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.h, p0/m, z0.h
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <16 x i16>, <16 x i16>* %a
|
%op = load <16 x i16>, <16 x i16>* %a
|
||||||
%res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
|
%res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
|
||||||
store <16 x i16> %res, <16 x i16>* %a
|
store <16 x i16> %res, <16 x i16>* %a
|
||||||
|
@ -384,49 +416,53 @@ define void @bswap_v16i16(<16 x i16>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v32i16(<32 x i16>* %a) #0 {
|
define void @bswap_v32i16(<32 x i16>* %a) #0 {
|
||||||
; CHECK-LABEL: bswap_v32i16:
|
; VBITS_GE_256-LABEL: bswap_v32i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: revb z0.h, p0/m, z0.h
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: revb z1.h, p0/m, z1.h
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
|
;
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
|
; VBITS_GE_512-LABEL: bswap_v32i16:
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: revb z0.h, p0/m, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <32 x i16>, <32 x i16>* %a
|
%op = load <32 x i16>, <32 x i16>* %a
|
||||||
%res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op)
|
%res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op)
|
||||||
store <32 x i16> %res, <32 x i16>* %a
|
store <32 x i16> %res, <32 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v64i16(<64 x i16>* %a) #0 {
|
define void @bswap_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v64i16:
|
; CHECK-LABEL: bswap_v64i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.h, p0/m, z0.h
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <64 x i16>, <64 x i16>* %a
|
%op = load <64 x i16>, <64 x i16>* %a
|
||||||
%res = call <64 x i16> @llvm.bswap.v64i16(<64 x i16> %op)
|
%res = call <64 x i16> @llvm.bswap.v64i16(<64 x i16> %op)
|
||||||
store <64 x i16> %res, <64 x i16>* %a
|
store <64 x i16> %res, <64 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v128i16(<128 x i16>* %a) #0 {
|
define void @bswap_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v128i16:
|
; CHECK-LABEL: bswap_v128i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.h, p0/m, z0.h
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <128 x i16>, <128 x i16>* %a
|
%op = load <128 x i16>, <128 x i16>* %a
|
||||||
%res = call <128 x i16> @llvm.bswap.v128i16(<128 x i16> %op)
|
%res = call <128 x i16> @llvm.bswap.v128i16(<128 x i16> %op)
|
||||||
store <128 x i16> %res, <128 x i16>* %a
|
store <128 x i16> %res, <128 x i16>* %a
|
||||||
|
@ -434,30 +470,33 @@ define void @bswap_v128i16(<128 x i16>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 {
|
define <2 x i32> @bswap_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v2i32:
|
; CHECK-LABEL: bswap_v2i32:
|
||||||
; CHECK: rev32 v0.8b, v0.8b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev32 v0.8b, v0.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
|
%res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
|
||||||
ret <2 x i32> %res
|
ret <2 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 {
|
define <4 x i32> @bswap_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v4i32:
|
; CHECK-LABEL: bswap_v4i32:
|
||||||
; CHECK: rev32 v0.16b, v0.16b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
|
%res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
|
||||||
ret <4 x i32> %res
|
ret <4 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v8i32(<8 x i32>* %a) #0 {
|
define void @bswap_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v8i32:
|
; CHECK-LABEL: bswap_v8i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.s, p0/m, z0.s
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <8 x i32>, <8 x i32>* %a
|
%op = load <8 x i32>, <8 x i32>* %a
|
||||||
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
|
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
|
||||||
store <8 x i32> %res, <8 x i32>* %a
|
store <8 x i32> %res, <8 x i32>* %a
|
||||||
|
@ -465,49 +504,53 @@ define void @bswap_v8i32(<8 x i32>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v16i32(<16 x i32>* %a) #0 {
|
define void @bswap_v16i32(<16 x i32>* %a) #0 {
|
||||||
; CHECK-LABEL: bswap_v16i32:
|
; VBITS_GE_256-LABEL: bswap_v16i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: revb z0.s, p0/m, z0.s
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: revb z1.s, p0/m, z1.s
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
|
;
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
|
; VBITS_GE_512-LABEL: bswap_v16i32:
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: revb z0.s, p0/m, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <16 x i32>, <16 x i32>* %a
|
%op = load <16 x i32>, <16 x i32>* %a
|
||||||
%res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op)
|
%res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op)
|
||||||
store <16 x i32> %res, <16 x i32>* %a
|
store <16 x i32> %res, <16 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v32i32(<32 x i32>* %a) #0 {
|
define void @bswap_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v32i32:
|
; CHECK-LABEL: bswap_v32i32:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.s, p0/m, z0.s
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <32 x i32>, <32 x i32>* %a
|
%op = load <32 x i32>, <32 x i32>* %a
|
||||||
%res = call <32 x i32> @llvm.bswap.v32i32(<32 x i32> %op)
|
%res = call <32 x i32> @llvm.bswap.v32i32(<32 x i32> %op)
|
||||||
store <32 x i32> %res, <32 x i32>* %a
|
store <32 x i32> %res, <32 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v64i32(<64 x i32>* %a) #0 {
|
define void @bswap_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v64i32:
|
; CHECK-LABEL: bswap_v64i32:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.s, p0/m, z0.s
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <64 x i32>, <64 x i32>* %a
|
%op = load <64 x i32>, <64 x i32>* %a
|
||||||
%res = call <64 x i32> @llvm.bswap.v64i32(<64 x i32> %op)
|
%res = call <64 x i32> @llvm.bswap.v64i32(<64 x i32> %op)
|
||||||
store <64 x i32> %res, <64 x i32>* %a
|
store <64 x i32> %res, <64 x i32>* %a
|
||||||
|
@ -515,30 +558,33 @@ define void @bswap_v64i32(<64 x i32>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
|
define <1 x i64> @bswap_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v1i64:
|
; CHECK-LABEL: bswap_v1i64:
|
||||||
; CHECK: rev64 v0.8b, v0.8b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev64 v0.8b, v0.8b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op)
|
%res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op)
|
||||||
ret <1 x i64> %res
|
ret <1 x i64> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
|
define <2 x i64> @bswap_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v2i64:
|
; CHECK-LABEL: bswap_v2i64:
|
||||||
; CHECK: rev64 v0.16b, v0.16b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rev64 v0.16b, v0.16b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op)
|
%res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v4i64(<4 x i64>* %a) #0 {
|
define void @bswap_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v4i64:
|
; CHECK-LABEL: bswap_v4i64:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.d, p0/m, z0.d
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <4 x i64>, <4 x i64>* %a
|
%op = load <4 x i64>, <4 x i64>* %a
|
||||||
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
|
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
|
||||||
store <4 x i64> %res, <4 x i64>* %a
|
store <4 x i64> %res, <4 x i64>* %a
|
||||||
|
@ -546,49 +592,53 @@ define void @bswap_v4i64(<4 x i64>* %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v8i64(<8 x i64>* %a) #0 {
|
define void @bswap_v8i64(<8 x i64>* %a) #0 {
|
||||||
; CHECK-LABEL: bswap_v8i64:
|
; VBITS_GE_256-LABEL: bswap_v8i64:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: revb z0.d, p0/m, z0.d
|
||||||
; Ensure sensible type legalisation.
|
; VBITS_GE_256-NEXT: revb z1.d, p0/m, z1.d
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
|
;
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
|
; VBITS_GE_512-LABEL: bswap_v8i64:
|
||||||
; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: revb z0.d, p0/m, z0.d
|
||||||
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%op = load <8 x i64>, <8 x i64>* %a
|
%op = load <8 x i64>, <8 x i64>* %a
|
||||||
%res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op)
|
%res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op)
|
||||||
store <8 x i64> %res, <8 x i64>* %a
|
store <8 x i64> %res, <8 x i64>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v16i64(<16 x i64>* %a) #0 {
|
define void @bswap_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v16i64:
|
; CHECK-LABEL: bswap_v16i64:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.d, p0/m, z0.d
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <16 x i64>, <16 x i64>* %a
|
%op = load <16 x i64>, <16 x i64>* %a
|
||||||
%res = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> %op)
|
%res = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> %op)
|
||||||
store <16 x i64> %res, <16 x i64>* %a
|
store <16 x i64> %res, <16 x i64>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bswap_v32i64(<32 x i64>* %a) #0 {
|
define void @bswap_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: bswap_v32i64:
|
; CHECK-LABEL: bswap_v32i64:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: revb [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
|
; CHECK-NEXT: revb z0.d, p0/m, z0.d
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%op = load <32 x i64>, <32 x i64>* %a
|
%op = load <32 x i64>, <32 x i64>* %a
|
||||||
%res = call <32 x i64> @llvm.bswap.v32i64(<32 x i64> %op)
|
%res = call <32 x i64> @llvm.bswap.v32i64(<32 x i64> %op)
|
||||||
store <32 x i64> %res, <32 x i64>* %a
|
store <32 x i64> %res, <32 x i64>* %a
|
||||||
|
@ -640,4 +690,3 @@ declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
|
||||||
declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
|
declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
|
||||||
declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
|
declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
|
||||||
declare <32 x i64> @llvm.bswap.v32i64(<32 x i64>)
|
declare <32 x i64> @llvm.bswap.v32i64(<32 x i64>)
|
||||||
|
|
||||||
|
|
|
@ -1,23 +1,11 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
|
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v8i8:
|
; CHECK-LABEL: sdiv_v8i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
|
@ -29,7 +17,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
|
||||||
ret <8 x i8> %res
|
ret <8 x i8> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
|
define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v16i8:
|
; CHECK-LABEL: sdiv_v16i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
|
@ -41,7 +29,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
|
||||||
ret <16 x i8> %res
|
ret <16 x i8> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v32i8(<32 x i8>* %a) #0 {
|
define void @sdiv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v32i8:
|
; CHECK-LABEL: sdiv_v32i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
|
@ -81,91 +69,35 @@ define void @sdiv_v64i8(<64 x i8>* %a) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v128i8(<128 x i8>* %a) #0 {
|
define void @sdiv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v128i8:
|
; CHECK-LABEL: sdiv_v128i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #96
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_256-NEXT: mov w9, #32
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov w10, #64
|
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x9]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0, x10]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.b, p0/m, z1.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.b, p0/m, z0.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.b, p0/m, z2.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.b, p0/m, z3.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0, x10]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0, x9]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z3.b }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: sdiv_v128i8:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.b, vl128
|
|
||||||
; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: asrd z0.b, p0/m, z0.b, #5
|
|
||||||
; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <128 x i8>, <128 x i8>* %a
|
%op1 = load <128 x i8>, <128 x i8>* %a
|
||||||
%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
|
%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
|
||||||
store <128 x i8> %res, <128 x i8>* %a
|
store <128 x i8> %res, <128 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v256i8(<256 x i8>* %a) #0 {
|
define void @sdiv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v256i8:
|
; CHECK-LABEL: sdiv_v256i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #192
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_256-NEXT: mov w9, #96
|
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov w10, #32
|
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||||
; VBITS_GE_256-NEXT: mov w11, #160
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov w12, #64
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov w13, #224
|
|
||||||
; VBITS_GE_256-NEXT: mov w14, #128
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x9]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x10]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0, x11]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x0, x12]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z4.b }, p0/z, [x0, x13]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z5.b }, p0/z, [x0, x14]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z6.b }, p0/z, [x0, x8]
|
|
||||||
; VBITS_GE_256-NEXT: ld1b { z7.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.b, p0/m, z1.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.b, p0/m, z0.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.b, p0/m, z3.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.b, p0/m, z2.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z5.b, p0/m, z5.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z4.b, p0/m, z4.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z6.b, p0/m, z6.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z7.b, p0/m, z7.b, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z6.b }, p0, [x0, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z4.b }, p0, [x0, x13]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z5.b }, p0, [x0, x14]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0, x11]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z3.b }, p0, [x0, x12]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x9]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0, x10]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z7.b }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: sdiv_v256i8:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl256
|
|
||||||
; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: asrd z0.b, p0/m, z0.b, #5
|
|
||||||
; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <256 x i8>, <256 x i8>* %a
|
%op1 = load <256 x i8>, <256 x i8>* %a
|
||||||
%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
|
%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
|
||||||
store <256 x i8> %res, <256 x i8>* %a
|
store <256 x i8> %res, <256 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
|
define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v4i16:
|
; CHECK-LABEL: sdiv_v4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
|
@ -177,7 +109,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
|
||||||
ret <4 x i16> %res
|
ret <4 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
|
define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v8i16:
|
; CHECK-LABEL: sdiv_v8i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
|
@ -189,7 +121,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v16i16(<16 x i16>* %a) #0 {
|
define void @sdiv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v16i16:
|
; CHECK-LABEL: sdiv_v16i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -229,91 +161,35 @@ define void @sdiv_v32i16(<32 x i16>* %a) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v64i16(<64 x i16>* %a) #0 {
|
define void @sdiv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v64i16:
|
; CHECK-LABEL: sdiv_v64i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #32
|
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.h, p0/m, z1.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.h, p0/m, z0.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.h, p0/m, z2.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.h, p0/m, z3.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: sdiv_v64i16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: asrd z0.h, p0/m, z0.h, #5
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <64 x i16>, <64 x i16>* %a
|
%op1 = load <64 x i16>, <64 x i16>* %a
|
||||||
%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
|
%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
|
||||||
store <64 x i16> %res, <64 x i16>* %a
|
store <64 x i16> %res, <64 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v128i16(<128 x i16>* %a) #0 {
|
define void @sdiv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v128i16:
|
; CHECK-LABEL: sdiv_v128i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #96
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||||
; VBITS_GE_256-NEXT: mov x11, #80
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #112
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #64
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z6.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z7.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.h, p0/m, z1.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.h, p0/m, z0.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.h, p0/m, z3.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.h, p0/m, z2.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z5.h, p0/m, z5.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z4.h, p0/m, z4.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z6.h, p0/m, z6.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z7.h, p0/m, z7.h, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z6.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0, x13, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z5.h }, p0, [x0, x14, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z7.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: sdiv_v128i16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: asrd z0.h, p0/m, z0.h, #5
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <128 x i16>, <128 x i16>* %a
|
%op1 = load <128 x i16>, <128 x i16>* %a
|
||||||
%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
|
%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
|
||||||
store <128 x i16> %res, <128 x i16>* %a
|
store <128 x i16> %res, <128 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
|
define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v2i32:
|
; CHECK-LABEL: sdiv_v2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
|
@ -325,7 +201,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
|
||||||
ret <2 x i32> %res
|
ret <2 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
|
define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v4i32:
|
; CHECK-LABEL: sdiv_v4i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
|
@ -337,7 +213,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
|
||||||
ret <4 x i32> %res
|
ret <4 x i32> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v8i32(<8 x i32>* %a) #0 {
|
define void @sdiv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v8i32:
|
; CHECK-LABEL: sdiv_v8i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -377,91 +253,35 @@ define void @sdiv_v16i32(<16 x i32>* %a) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v32i32(<32 x i32>* %a) #0 {
|
define void @sdiv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v32i32:
|
; CHECK-LABEL: sdiv_v32i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.s, p0/m, z1.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.s, p0/m, z0.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.s, p0/m, z2.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.s, p0/m, z3.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: sdiv_v32i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: asrd z0.s, p0/m, z0.s, #5
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <32 x i32>, <32 x i32>* %a
|
%op1 = load <32 x i32>, <32 x i32>* %a
|
||||||
%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
|
%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
|
||||||
store <32 x i32> %res, <32 x i32>* %a
|
store <32 x i32> %res, <32 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v64i32(<64 x i32>* %a) #0 {
|
define void @sdiv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v64i32:
|
; CHECK-LABEL: sdiv_v64i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||||
; VBITS_GE_256-NEXT: mov x11, #40
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #56
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #32
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.s, p0/m, z1.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.s, p0/m, z0.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.s, p0/m, z3.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.s, p0/m, z2.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z5.s, p0/m, z5.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z4.s, p0/m, z4.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z6.s, p0/m, z6.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z7.s, p0/m, z7.s, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: sdiv_v64i32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: asrd z0.s, p0/m, z0.s, #5
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <64 x i32>, <64 x i32>* %a
|
%op1 = load <64 x i32>, <64 x i32>* %a
|
||||||
%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
|
%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
|
||||||
store <64 x i32> %res, <64 x i32>* %a
|
store <64 x i32> %res, <64 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
|
define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v1i64:
|
; CHECK-LABEL: sdiv_v1i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
|
@ -474,7 +294,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Vector i64 sdiv are not legal for NEON so use SVE when available.
|
; Vector i64 sdiv are not legal for NEON so use SVE when available.
|
||||||
define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
|
define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v2i64:
|
; CHECK-LABEL: sdiv_v2i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||||
|
@ -486,7 +306,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v4i64(<4 x i64>* %a) #0 {
|
define void @sdiv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: sdiv_v4i64:
|
; CHECK-LABEL: sdiv_v4i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -526,84 +346,28 @@ define void @sdiv_v8i64(<8 x i64>* %a) #0 {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v16i64(<16 x i64>* %a) #0 {
|
define void @sdiv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v16i64:
|
; CHECK-LABEL: sdiv_v16i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #4
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.d, p0/m, z1.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.d, p0/m, z0.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.d, p0/m, z2.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.d, p0/m, z3.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: sdiv_v16i64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: asrd z0.d, p0/m, z0.d, #5
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%op1 = load <16 x i64>, <16 x i64>* %a
|
%op1 = load <16 x i64>, <16 x i64>* %a
|
||||||
%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
|
%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
|
||||||
store <16 x i64> %res, <16 x i64>* %a
|
store <16 x i64> %res, <16 x i64>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @sdiv_v32i64(<32 x i64>* %a) #0 {
|
define void @sdiv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: sdiv_v32i64:
|
; CHECK-LABEL: sdiv_v32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #12
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||||
; VBITS_GE_256-NEXT: mov x11, #20
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x12, #8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x13, #28
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #16
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: asrd z1.d, p0/m, z1.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z0.d, p0/m, z0.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z3.d, p0/m, z3.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z2.d, p0/m, z2.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z5.d, p0/m, z5.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z4.d, p0/m, z4.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z6.d, p0/m, z6.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: asrd z7.d, p0/m, z7.d, #5
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: sdiv_v32i64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: asrd z0.d, p0/m, z0.d, #5
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%op1 = load <32 x i64>, <32 x i64>* %a
|
%op1 = load <32 x i64>, <32 x i64>* %a
|
||||||
%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
|
%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
|
||||||
store <32 x i64> %res, <32 x i64>* %a
|
store <32 x i64> %res, <32 x i64>* %a
|
||||||
|
|
|
@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
; bigger than NEON. However, having no support opens us up to a code generator
|
; bigger than NEON. However, having no support opens us up to a code generator
|
||||||
; hang when expanding BUILD_VECTOR. Here we just validate the promblematic case
|
; hang when expanding BUILD_VECTOR. Here we just validate the promblematic case
|
||||||
; successfully exits code generation.
|
; successfully exits code generation.
|
||||||
define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32> %b) #0 {
|
define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32> %b) vscale_range(2,2) #0 {
|
||||||
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
|
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
||||||
|
@ -37,8 +37,8 @@ define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Ensure we don't crash when trying to lower a shuffle via and extract
|
; Ensure we don't crash when trying to lower a shuffle via an extract
|
||||||
define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) #0 {
|
define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) vscale_range(2,2) #0 {
|
||||||
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
|
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
|
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
|
||||||
|
@ -132,4 +132,4 @@ exit:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { vscale_range(2,2) "target-features"="+sve" }
|
attributes #0 = { "target-features"="+sve" }
|
||||||
|
|
|
@ -1,21 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@ -24,7 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <8 x i8> @splat_v8i8(i8 %a) #0 {
|
define <8 x i8> @splat_v8i8(i8 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v8i8:
|
; CHECK-LABEL: splat_v8i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.8b, w0
|
; CHECK-NEXT: dup v0.8b, w0
|
||||||
|
@ -35,7 +21,7 @@ define <8 x i8> @splat_v8i8(i8 %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <16 x i8> @splat_v16i8(i8 %a) #0 {
|
define <16 x i8> @splat_v16i8(i8 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v16i8:
|
; CHECK-LABEL: splat_v16i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.16b, w0
|
; CHECK-NEXT: dup v0.16b, w0
|
||||||
|
@ -45,7 +31,7 @@ define <16 x i8> @splat_v16i8(i8 %a) #0 {
|
||||||
ret <16 x i8> %splat
|
ret <16 x i8> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
|
define void @splat_v32i8(i8 %a, <32 x i8>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v32i8:
|
; CHECK-LABEL: splat_v32i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
|
@ -74,68 +60,32 @@ define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.b, w0
|
; VBITS_GE_512-NEXT: mov z0.b, w0
|
||||||
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <64 x i8> undef, i8 %a, i64 0
|
%insert = insertelement <64 x i8> undef, i8 %a, i64 0
|
||||||
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x i8> %splat, <64 x i8>* %b
|
store <64 x i8> %splat, <64 x i8>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 {
|
define void @splat_v128i8(i8 %a, <128 x i8>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v128i8:
|
; CHECK-LABEL: splat_v128i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #96
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
; VBITS_GE_256-NEXT: mov w9, #64
|
; CHECK-NEXT: mov z0.b, w0
|
||||||
; VBITS_GE_256-NEXT: mov w10, #32
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.b, w0
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x9]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x10]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v128i8:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.b, vl128
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.b, w0
|
|
||||||
; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <128 x i8> undef, i8 %a, i64 0
|
%insert = insertelement <128 x i8> undef, i8 %a, i64 0
|
||||||
%splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
|
%splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
|
||||||
store <128 x i8> %splat, <128 x i8>* %b
|
store <128 x i8> %splat, <128 x i8>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 {
|
define void @splat_v256i8(i8 %a, <256 x i8>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v256i8:
|
; CHECK-LABEL: splat_v256i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #224
|
; CHECK-NEXT: ptrue p0.b, vl256
|
||||||
; VBITS_GE_256-NEXT: mov w9, #192
|
; CHECK-NEXT: mov z0.b, w0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov z0.b, w0
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov w10, #160
|
|
||||||
; VBITS_GE_256-NEXT: mov w11, #128
|
|
||||||
; VBITS_GE_256-NEXT: mov w12, #96
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8]
|
|
||||||
; VBITS_GE_256-NEXT: mov w8, #64
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x9]
|
|
||||||
; VBITS_GE_256-NEXT: mov w9, #32
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x10]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x11]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x12]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x9]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v256i8:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl256
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.b, w0
|
|
||||||
; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <256 x i8> undef, i8 %a, i64 0
|
%insert = insertelement <256 x i8> undef, i8 %a, i64 0
|
||||||
%splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
|
%splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
|
||||||
store <256 x i8> %splat, <256 x i8>* %b
|
store <256 x i8> %splat, <256 x i8>* %b
|
||||||
|
@ -143,7 +93,7 @@ define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x i16> @splat_v4i16(i16 %a) #0 {
|
define <4 x i16> @splat_v4i16(i16 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4i16:
|
; CHECK-LABEL: splat_v4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.4h, w0
|
; CHECK-NEXT: dup v0.4h, w0
|
||||||
|
@ -154,7 +104,7 @@ define <4 x i16> @splat_v4i16(i16 %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x i16> @splat_v8i16(i16 %a) #0 {
|
define <8 x i16> @splat_v8i16(i16 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v8i16:
|
; CHECK-LABEL: splat_v8i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.8h, w0
|
; CHECK-NEXT: dup v0.8h, w0
|
||||||
|
@ -164,7 +114,7 @@ define <8 x i16> @splat_v8i16(i16 %a) #0 {
|
||||||
ret <8 x i16> %splat
|
ret <8 x i16> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
|
define void @splat_v16i16(i16 %a, <16 x i16>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v16i16:
|
; CHECK-LABEL: splat_v16i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -193,68 +143,32 @@ define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.h, w0
|
; VBITS_GE_512-NEXT: mov z0.h, w0
|
||||||
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <32 x i16> undef, i16 %a, i64 0
|
%insert = insertelement <32 x i16> undef, i16 %a, i64 0
|
||||||
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x i16> %splat, <32 x i16>* %b
|
store <32 x i16> %splat, <32 x i16>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 {
|
define void @splat_v64i16(i16 %a, <64 x i16>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v64i16:
|
; CHECK-LABEL: splat_v64i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #32
|
; CHECK-NEXT: mov z0.h, w0
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, w0
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v64i16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.h, w0
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <64 x i16> undef, i16 %a, i64 0
|
%insert = insertelement <64 x i16> undef, i16 %a, i64 0
|
||||||
%splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x i16> %splat, <64 x i16>* %b
|
store <64 x i16> %splat, <64 x i16>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 {
|
define void @splat_v128i16(i16 %a, <128 x i16>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v128i16:
|
; CHECK-LABEL: splat_v128i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #112
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: mov x9, #96
|
; CHECK-NEXT: mov z0.h, w0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, w0
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x10, #80
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #64
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #48
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v128i16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.h, w0
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <128 x i16> undef, i16 %a, i64 0
|
%insert = insertelement <128 x i16> undef, i16 %a, i64 0
|
||||||
%splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
|
%splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
|
||||||
store <128 x i16> %splat, <128 x i16>* %b
|
store <128 x i16> %splat, <128 x i16>* %b
|
||||||
|
@ -262,7 +176,7 @@ define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x i32> @splat_v2i32(i32 %a) #0 {
|
define <2 x i32> @splat_v2i32(i32 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v2i32:
|
; CHECK-LABEL: splat_v2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.2s, w0
|
; CHECK-NEXT: dup v0.2s, w0
|
||||||
|
@ -273,7 +187,7 @@ define <2 x i32> @splat_v2i32(i32 %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x i32> @splat_v4i32(i32 %a) #0 {
|
define <4 x i32> @splat_v4i32(i32 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4i32:
|
; CHECK-LABEL: splat_v4i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.4s, w0
|
; CHECK-NEXT: dup v0.4s, w0
|
||||||
|
@ -283,7 +197,7 @@ define <4 x i32> @splat_v4i32(i32 %a) #0 {
|
||||||
ret <4 x i32> %splat
|
ret <4 x i32> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
|
define void @splat_v8i32(i32 %a, <8 x i32>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v8i32:
|
; CHECK-LABEL: splat_v8i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -312,68 +226,32 @@ define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.s, w0
|
; VBITS_GE_512-NEXT: mov z0.s, w0
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <16 x i32> undef, i32 %a, i64 0
|
%insert = insertelement <16 x i32> undef, i32 %a, i64 0
|
||||||
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x i32> %splat, <16 x i32>* %b
|
store <16 x i32> %splat, <16 x i32>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 {
|
define void @splat_v32i32(i32 %a, <32 x i32>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v32i32:
|
; CHECK-LABEL: splat_v32i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: mov z0.s, w0
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, w0
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v32i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.s, w0
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <32 x i32> undef, i32 %a, i64 0
|
%insert = insertelement <32 x i32> undef, i32 %a, i64 0
|
||||||
%splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x i32> %splat, <32 x i32>* %b
|
store <32 x i32> %splat, <32 x i32>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 {
|
define void @splat_v64i32(i32 %a, <64 x i32>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v64i32:
|
; CHECK-LABEL: splat_v64i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #56
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: mov z0.s, w0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, w0
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x10, #40
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #32
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v64i32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.s, w0
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <64 x i32> undef, i32 %a, i64 0
|
%insert = insertelement <64 x i32> undef, i32 %a, i64 0
|
||||||
%splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x i32> %splat, <64 x i32>* %b
|
store <64 x i32> %splat, <64 x i32>* %b
|
||||||
|
@ -381,7 +259,7 @@ define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x i64> @splat_v1i64(i64 %a) #0 {
|
define <1 x i64> @splat_v1i64(i64 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v1i64:
|
; CHECK-LABEL: splat_v1i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: fmov d0, x0
|
; CHECK-NEXT: fmov d0, x0
|
||||||
|
@ -392,7 +270,7 @@ define <1 x i64> @splat_v1i64(i64 %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x i64> @splat_v2i64(i64 %a) #0 {
|
define <2 x i64> @splat_v2i64(i64 %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v2i64:
|
; CHECK-LABEL: splat_v2i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: dup v0.2d, x0
|
; CHECK-NEXT: dup v0.2d, x0
|
||||||
|
@ -402,7 +280,7 @@ define <2 x i64> @splat_v2i64(i64 %a) #0 {
|
||||||
ret <2 x i64> %splat
|
ret <2 x i64> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
|
define void @splat_v4i64(i64 %a, <4 x i64>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4i64:
|
; CHECK-LABEL: splat_v4i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
|
@ -431,68 +309,32 @@ define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.d, x0
|
; VBITS_GE_512-NEXT: mov z0.d, x0
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <8 x i64> undef, i64 %a, i64 0
|
%insert = insertelement <8 x i64> undef, i64 %a, i64 0
|
||||||
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
|
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||||
store <8 x i64> %splat, <8 x i64>* %b
|
store <8 x i64> %splat, <8 x i64>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 {
|
define void @splat_v16i64(i64 %a, <16 x i64>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v16i64:
|
; CHECK-LABEL: splat_v16i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: mov z0.d, x0
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, x0
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v16i64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.d, x0
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <16 x i64> undef, i64 %a, i64 0
|
%insert = insertelement <16 x i64> undef, i64 %a, i64 0
|
||||||
%splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x i64> %splat, <16 x i64>* %b
|
store <16 x i64> %splat, <16 x i64>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 {
|
define void @splat_v32i64(i64 %a, <32 x i64>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v32i64:
|
; CHECK-LABEL: splat_v32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #28
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: mov z0.d, x0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, x0
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x10, #20
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #4
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v32i64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.d, x0
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <32 x i64> undef, i64 %a, i64 0
|
%insert = insertelement <32 x i64> undef, i64 %a, i64 0
|
||||||
%splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x i64> %splat, <32 x i64>* %b
|
store <32 x i64> %splat, <32 x i64>* %b
|
||||||
|
@ -504,7 +346,7 @@ define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <4 x half> @splat_v4f16(half %a) #0 {
|
define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4f16:
|
; CHECK-LABEL: splat_v4f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
|
||||||
|
@ -516,7 +358,7 @@ define <4 x half> @splat_v4f16(half %a) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <8 x half> @splat_v8f16(half %a) #0 {
|
define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v8f16:
|
; CHECK-LABEL: splat_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
|
||||||
|
@ -527,7 +369,7 @@ define <8 x half> @splat_v8f16(half %a) #0 {
|
||||||
ret <8 x half> %splat
|
ret <8 x half> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
|
define void @splat_v16f16(half %a, <16 x half>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v16f16:
|
; CHECK-LABEL: splat_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
||||||
|
@ -559,72 +401,34 @@ define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.h, h0
|
; VBITS_GE_512-NEXT: mov z0.h, h0
|
||||||
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <32 x half> undef, half %a, i64 0
|
%insert = insertelement <32 x half> undef, half %a, i64 0
|
||||||
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x half> %splat, <32 x half>* %b
|
store <32 x half> %splat, <32 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v64f16(half %a, <64 x half>* %b) #0 {
|
define void @splat_v64f16(half %a, <64 x half>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v64f16:
|
; CHECK-LABEL: splat_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #32
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: mov z0.h, h0
|
||||||
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, h0
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.h, h0
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <64 x half> undef, half %a, i64 0
|
%insert = insertelement <64 x half> undef, half %a, i64 0
|
||||||
%splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x half> %splat, <64 x half>* %b
|
store <64 x half> %splat, <64 x half>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v128f16(half %a, <128 x half>* %b) #0 {
|
define void @splat_v128f16(half %a, <128 x half>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v128f16:
|
; CHECK-LABEL: splat_v128f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #112
|
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #96
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0
|
; CHECK-NEXT: mov z0.h, h0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #80
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, h0
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #64
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #48
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #32
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x11, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x12, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v128f16:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: // kill: def $h0 killed $h0 def $z0
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.h, h0
|
|
||||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <128 x half> undef, half %a, i64 0
|
%insert = insertelement <128 x half> undef, half %a, i64 0
|
||||||
%splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
|
%splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
|
||||||
store <128 x half> %splat, <128 x half>* %b
|
store <128 x half> %splat, <128 x half>* %b
|
||||||
|
@ -632,7 +436,7 @@ define void @splat_v128f16(half %a, <128 x half>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
|
define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v2f32:
|
; CHECK-LABEL: splat_v2f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||||
|
@ -644,7 +448,7 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
|
define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4f32:
|
; CHECK-LABEL: splat_v4f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
||||||
|
@ -655,7 +459,7 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
|
||||||
ret <4 x float> %splat
|
ret <4 x float> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v8f32(float %a, <8 x float>* %b) #0 {
|
define void @splat_v8f32(float %a, <8 x float>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v8f32:
|
; CHECK-LABEL: splat_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
||||||
|
@ -687,72 +491,34 @@ define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.s, s0
|
; VBITS_GE_512-NEXT: mov z0.s, s0
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <16 x float> undef, float %a, i64 0
|
%insert = insertelement <16 x float> undef, float %a, i64 0
|
||||||
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x float> %splat, <16 x float>* %b
|
store <16 x float> %splat, <16 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v32f32(float %a, <32 x float>* %b) #0 {
|
define void @splat_v32f32(float %a, <32 x float>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v32f32:
|
; CHECK-LABEL: splat_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: mov z0.s, s0
|
||||||
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, s0
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.s, s0
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <32 x float> undef, float %a, i64 0
|
%insert = insertelement <32 x float> undef, float %a, i64 0
|
||||||
%splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x float> %splat, <32 x float>* %b
|
store <32 x float> %splat, <32 x float>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v64f32(float %a, <64 x float>* %b) #0 {
|
define void @splat_v64f32(float %a, <64 x float>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v64f32:
|
; CHECK-LABEL: splat_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #56
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0
|
; CHECK-NEXT: mov z0.s, s0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #40
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, s0
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #32
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.s, s0
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <64 x float> undef, float %a, i64 0
|
%insert = insertelement <64 x float> undef, float %a, i64 0
|
||||||
%splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x float> %splat, <64 x float>* %b
|
store <64 x float> %splat, <64 x float>* %b
|
||||||
|
@ -760,7 +526,7 @@ define void @splat_v64f32(float %a, <64 x float>* %b) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
|
define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v1f64:
|
; CHECK-LABEL: splat_v1f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
@ -770,7 +536,7 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
|
define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v2f64:
|
; CHECK-LABEL: splat_v2f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||||
|
@ -781,7 +547,7 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
|
||||||
ret <2 x double> %splat
|
ret <2 x double> %splat
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
|
define void @splat_v4f64(double %a, <4 x double>* %b) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: splat_v4f64:
|
; CHECK-LABEL: splat_v4f64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
|
@ -813,72 +579,34 @@ define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
|
||||||
; VBITS_GE_512-NEXT: mov z0.d, d0
|
; VBITS_GE_512-NEXT: mov z0.d, d0
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
%insert = insertelement <8 x double> undef, double %a, i64 0
|
%insert = insertelement <8 x double> undef, double %a, i64 0
|
||||||
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
|
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
|
||||||
store <8 x double> %splat, <8 x double>* %b
|
store <8 x double> %splat, <8 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v16f64(double %a, <16 x double>* %b) #0 {
|
define void @splat_v16f64(double %a, <16 x double>* %b) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v16f64:
|
; CHECK-LABEL: splat_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: mov z0.d, d0
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, d0
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: splat_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: mov z0.d, d0
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%insert = insertelement <16 x double> undef, double %a, i64 0
|
%insert = insertelement <16 x double> undef, double %a, i64 0
|
||||||
%splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x double> %splat, <16 x double>* %b
|
store <16 x double> %splat, <16 x double>* %b
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_v32f64(double %a, <32 x double>* %b) #0 {
|
define void @splat_v32f64(double %a, <32 x double>* %b) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_v32f64:
|
; CHECK-LABEL: splat_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #28
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0
|
; CHECK-NEXT: mov z0.d, d0
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #20
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, d0
|
|
||||||
; VBITS_GE_256-NEXT: mov x11, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: mov x9, #4
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: splat_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: mov z0.d, d0
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%insert = insertelement <32 x double> undef, double %a, i64 0
|
%insert = insertelement <32 x double> undef, double %a, i64 0
|
||||||
%splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x double> %splat, <32 x double>* %b
|
store <32 x double> %splat, <32 x double>* %b
|
||||||
|
@ -889,88 +617,52 @@ define void @splat_v32f64(double %a, <32 x double>* %b) #0 {
|
||||||
; DUP (integer immediate)
|
; DUP (integer immediate)
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @splat_imm_v64i8(<64 x i8>* %a) #0 {
|
define void @splat_imm_v64i8(<64 x i8>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v64i8:
|
; CHECK-LABEL: splat_imm_v64i8:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov w8, #32
|
; CHECK-NEXT: mov z0.b, #1 // =0x1
|
||||||
; VBITS_GE_256-NEXT: mov z0.b, #1 // =0x1
|
; CHECK-NEXT: ptrue p0.b, vl64
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v64i8:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: mov z0.b, #1 // =0x1
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
|
|
||||||
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <64 x i8> undef, i8 1, i64 0
|
%insert = insertelement <64 x i8> undef, i8 1, i64 0
|
||||||
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||||
store <64 x i8> %splat, <64 x i8>* %a
|
store <64 x i8> %splat, <64 x i8>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_imm_v32i16(<32 x i16>* %a) #0 {
|
define void @splat_imm_v32i16(<32 x i16>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v32i16:
|
; CHECK-LABEL: splat_imm_v32i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: mov z0.h, #2 // =0x2
|
||||||
; VBITS_GE_256-NEXT: mov z0.h, #2 // =0x2
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v32i16:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: mov z0.h, #2 // =0x2
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
||||||
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <32 x i16> undef, i16 2, i64 0
|
%insert = insertelement <32 x i16> undef, i16 2, i64 0
|
||||||
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x i16> %splat, <32 x i16>* %a
|
store <32 x i16> %splat, <32 x i16>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_imm_v16i32(<16 x i32>* %a) #0 {
|
define void @splat_imm_v16i32(<16 x i32>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v16i32:
|
; CHECK-LABEL: splat_imm_v16i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: mov z0.s, #3 // =0x3
|
||||||
; VBITS_GE_256-NEXT: mov z0.s, #3 // =0x3
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v16i32:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: mov z0.s, #3 // =0x3
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <16 x i32> undef, i32 3, i64 0
|
%insert = insertelement <16 x i32> undef, i32 3, i64 0
|
||||||
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x i32> %splat, <16 x i32>* %a
|
store <16 x i32> %splat, <16 x i32>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_imm_v8i64(<8 x i64>* %a) #0 {
|
define void @splat_imm_v8i64(<8 x i64>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v8i64:
|
; CHECK-LABEL: splat_imm_v8i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: mov z0.d, #4 // =0x4
|
||||||
; VBITS_GE_256-NEXT: mov z0.d, #4 // =0x4
|
; CHECK-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v8i64:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: mov z0.d, #4 // =0x4
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <8 x i64> undef, i64 4, i64 0
|
%insert = insertelement <8 x i64> undef, i64 4, i64 0
|
||||||
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
|
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||||
store <8 x i64> %splat, <8 x i64>* %a
|
store <8 x i64> %splat, <8 x i64>* %a
|
||||||
|
@ -981,69 +673,43 @@ define void @splat_imm_v8i64(<8 x i64>* %a) #0 {
|
||||||
; DUP (floating-point immediate)
|
; DUP (floating-point immediate)
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @splat_imm_v32f16(<32 x half>* %a) #0 {
|
define void @splat_imm_v32f16(<32 x half>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v32f16:
|
; CHECK-LABEL: splat_imm_v32f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #16
|
; CHECK-NEXT: fmov z0.h, #5.00000000
|
||||||
; VBITS_GE_256-NEXT: fmov z0.h, #5.00000000
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v32f16:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: fmov z0.h, #5.00000000
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
||||||
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <32 x half> undef, half 5.0, i64 0
|
%insert = insertelement <32 x half> undef, half 5.0, i64 0
|
||||||
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
|
||||||
store <32 x half> %splat, <32 x half>* %a
|
store <32 x half> %splat, <32 x half>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_imm_v16f32(<16 x float>* %a) #0 {
|
define void @splat_imm_v16f32(<16 x float>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v16f32:
|
; CHECK-LABEL: splat_imm_v16f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #8
|
; CHECK-NEXT: fmov z0.s, #6.00000000
|
||||||
; VBITS_GE_256-NEXT: fmov z0.s, #6.00000000
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v16f32:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: fmov z0.s, #6.00000000
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
||||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <16 x float> undef, float 6.0, i64 0
|
%insert = insertelement <16 x float> undef, float 6.0, i64 0
|
||||||
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
|
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
|
||||||
store <16 x float> %splat, <16 x float>* %a
|
store <16 x float> %splat, <16 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @splat_imm_v8f64(<8 x double>* %a) #0 {
|
define void @splat_imm_v8f64(<8 x double>* %a) vscale_range(4,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: splat_imm_v8f64:
|
; CHECK-LABEL: splat_imm_v8f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: fmov z0.d, #7.00000000
|
||||||
; VBITS_GE_256-NEXT: fmov z0.d, #7.00000000
|
; CHECK-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: splat_imm_v8f64:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: fmov z0.d, #7.00000000
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%insert = insertelement <8 x double> undef, double 7.0, i64 0
|
%insert = insertelement <8 x double> undef, double 7.0, i64 0
|
||||||
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
|
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
|
||||||
store <8 x double> %splat, <8 x double>* %a
|
store <8 x double> %splat, <8 x double>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+sve" }
|
attributes #0 = { "target-features"="+sve" }
|
||||||
|
|
|
@ -1,35 +1,29 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
|
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
|
||||||
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
|
||||||
; VBYTES represents the useful byte size of a vector register from the code
|
|
||||||
; generator's point of view. It is clamped to power-of-2 values because
|
|
||||||
; only power-of-2 vector lengths are considered legal, regardless of the
|
|
||||||
; user specified vector length.
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: ptrue
|
|
||||||
|
|
||||||
; Don't use SVE for 64-bit vectors.
|
; Don't use SVE for 64-bit vectors.
|
||||||
define void @store_v2f32(<2 x float>* %a) #0 {
|
define void @store_v2f32(<2 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v2f32:
|
; CHECK-LABEL: store_v2f32:
|
||||||
; CHECK: str xzr, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: ret
|
; CHECK-NEXT: str xzr, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
store <2 x float> zeroinitializer, <2 x float>* %a
|
store <2 x float> zeroinitializer, <2 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -37,66 +31,148 @@ define void @store_v2f32(<2 x float>* %a) #0 {
|
||||||
; Don't use SVE for 128-bit vectors.
|
; Don't use SVE for 128-bit vectors.
|
||||||
define void @store_v4f32(<4 x float>* %a) #0 {
|
define void @store_v4f32(<4 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v4f32:
|
; CHECK-LABEL: store_v4f32:
|
||||||
; CHECK: stp xzr, xzr, [x0]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: ret
|
; CHECK-NEXT: stp xzr, xzr, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
store <4 x float> zeroinitializer, <4 x float>* %a
|
store <4 x float> zeroinitializer, <4 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_v8f32(<8 x float>* %a) #0 {
|
define void @store_v8f32(<8 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v8f32:
|
; CHECK-LABEL: store_v8f32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
|
; CHECK: // %bb.0:
|
||||||
; CHECK: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK: ret
|
; CHECK-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
store <8 x float> zeroinitializer, <8 x float>* %a
|
store <8 x float> zeroinitializer, <8 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_v16f32(<16 x float>* %a) #0 {
|
define void @store_v16f32(<16 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v16f32:
|
; VBITS_GE_256-LABEL: store_v16f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: store_v16f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: store_v16f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_1024-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: store_v16f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
store <16 x float> zeroinitializer, <16 x float>* %a
|
store <16 x float> zeroinitializer, <16 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_v32f32(<32 x float>* %a) #0 {
|
define void @store_v32f32(<32 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v32f32:
|
; VBITS_GE_256-LABEL: store_v32f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: mov x8, #24
|
||||||
; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: mov x9, #16
|
||||||
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x10, #8
|
||||||
; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2]
|
; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0
|
||||||
; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: store_v32f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: mov x8, #16
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: store_v32f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_1024-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: store_v32f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
store <32 x float> zeroinitializer, <32 x float>* %a
|
store <32 x float> zeroinitializer, <32 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_v64f32(<64 x float>* %a) #0 {
|
define void @store_v64f32(<64 x float>* %a) #0 {
|
||||||
; CHECK-LABEL: store_v64f32:
|
; VBITS_GE_256-LABEL: store_v64f32:
|
||||||
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
|
; VBITS_GE_256: // %bb.0:
|
||||||
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
|
; VBITS_GE_256-NEXT: mov x8, #56
|
||||||
; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]]
|
; VBITS_GE_256-NEXT: mov x9, #48
|
||||||
; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_LE_512-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]]
|
; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0
|
||||||
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x10, #40
|
||||||
; VBITS_LE_512-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]]
|
; VBITS_GE_256-NEXT: mov x11, #32
|
||||||
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]]
|
; VBITS_GE_256-NEXT: mov x8, #24
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A4]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x12, #16
|
||||||
; VBITS_LE_256-DAG: mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A5]], lsl #2]
|
; VBITS_GE_256-NEXT: mov x9, #8
|
||||||
; VBITS_LE_256-DAG: mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2]
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A6]], lsl #2]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2]
|
||||||
; VBITS_LE_256-DAG: mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A7]], lsl #2]
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x12, lsl #2]
|
||||||
; CHECK: ret
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: store_v64f32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: mov x8, #48
|
||||||
|
; VBITS_GE_512-NEXT: mov x9, #32
|
||||||
|
; VBITS_GE_512-NEXT: mov x10, #16
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_1024-LABEL: store_v64f32:
|
||||||
|
; VBITS_GE_1024: // %bb.0:
|
||||||
|
; VBITS_GE_1024-NEXT: mov x8, #32
|
||||||
|
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||||
|
; VBITS_GE_1024-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_1024-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_2048-LABEL: store_v64f32:
|
||||||
|
; VBITS_GE_2048: // %bb.0:
|
||||||
|
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
||||||
|
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
|
||||||
|
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
||||||
|
; VBITS_GE_2048-NEXT: ret
|
||||||
store <64 x float> zeroinitializer, <64 x float>* %a
|
store <64 x float> zeroinitializer, <64 x float>* %a
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -aarch64-enable-atomic-cfg-tidy=false < %s | not grep ptrue
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
; Test we can code generater patterns of the form:
|
; Test we can code generater patterns of the form:
|
||||||
; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
|
; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
|
||||||
|
@ -28,7 +14,7 @@
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
define void @subvector_v8i16(<8 x i16> *%in, <8 x i16>* %out) #0 {
|
define void @subvector_v8i16(<8 x i16> *%in, <8 x i16>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v8i16:
|
; CHECK-LABEL: subvector_v8i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x0]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
@ -42,7 +28,7 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v16i16(<16 x i16> *%in, <16 x i16>* %out) #0 {
|
define void @subvector_v16i16(<16 x i16> *%in, <16 x i16>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v16i16:
|
; CHECK-LABEL: subvector_v16i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -82,29 +68,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v64i16(<64 x i16> *%in, <64 x i16>* %out) #0 {
|
define void @subvector_v64i16(<64 x i16> *%in, <64 x i16>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v64i16:
|
; CHECK-LABEL: subvector_v64i16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #32
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v64i16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <64 x i16>, <64 x i16>* %in
|
%a = load <64 x i16>, <64 x i16>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -113,7 +83,7 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) #0 {
|
define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v8i32:
|
; CHECK-LABEL: subvector_v8i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -153,29 +123,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) #0 {
|
define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v32i32:
|
; CHECK-LABEL: subvector_v32i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v32i32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <32 x i32>, <32 x i32>* %in
|
%a = load <32 x i32>, <32 x i32>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -184,41 +138,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) #0 {
|
define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v64i32:
|
; CHECK-LABEL: subvector_v64i32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #56
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #40
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #8
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: subvector_v64i32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%a = load <64 x i32>, <64 x i32>* %in
|
%a = load <64 x i32>, <64 x i32>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -228,23 +154,16 @@ bb1:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define void @subvector_v8i64(<8 x i64> *%in, <8 x i64>* %out) #0 {
|
define void @subvector_v8i64(<8 x i64> *%in, <8 x i64>* %out) vscale_range(2,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v8i64:
|
; CHECK-LABEL: subvector_v8i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #4
|
; CHECK-NEXT: mov x8, #4
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
;
|
|
||||||
; VBITS_GE_512-LABEL: subvector_v8i64:
|
|
||||||
; VBITS_GE_512: // %bb.0:
|
|
||||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
||||||
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
%a = load <8 x i64>, <8 x i64>* %in
|
%a = load <8 x i64>, <8 x i64>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -253,29 +172,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v16i64(<16 x i64> *%in, <16 x i64>* %out) #0 {
|
define void @subvector_v16i64(<16 x i64> *%in, <16 x i64>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v16i64:
|
; CHECK-LABEL: subvector_v16i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v16i64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <16 x i64>, <16 x i64>* %in
|
%a = load <16 x i64>, <16 x i64>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -284,41 +187,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v32i64(<32 x i64> *%in, <32 x i64>* %out) #0 {
|
define void @subvector_v32i64(<32 x i64> *%in, <32 x i64>* %out) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v32i64:
|
; CHECK-LABEL: subvector_v32i64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #28
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #20
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #4
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: subvector_v32i64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%a = load <32 x i64>, <32 x i64>* %in
|
%a = load <32 x i64>, <32 x i64>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -327,7 +202,7 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v8f16(<8 x half> *%in, <8 x half>* %out) #0 {
|
define void @subvector_v8f16(<8 x half> *%in, <8 x half>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v8f16:
|
; CHECK-LABEL: subvector_v8f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldr q0, [x0]
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
|
@ -341,7 +216,7 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v16f16(<16 x half> *%in, <16 x half>* %out) #0 {
|
define void @subvector_v16f16(<16 x half> *%in, <16 x half>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v16f16:
|
; CHECK-LABEL: subvector_v16f16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
|
@ -381,29 +256,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v64f16(<64 x half> *%in, <64 x half>* %out) #0 {
|
define void @subvector_v64f16(<64 x half> *%in, <64 x half>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v64f16:
|
; CHECK-LABEL: subvector_v64f16:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #48
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #32
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #16
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x1, x9, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x1, x10, lsl #1]
|
|
||||||
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v64f16:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
|
||||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <64 x half>, <64 x half>* %in
|
%a = load <64 x half>, <64 x half>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -412,7 +271,7 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v8f32(<8 x float> *%in, <8 x float>* %out) #0 {
|
define void @subvector_v8f32(<8 x float> *%in, <8 x float>* %out) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: subvector_v8f32:
|
; CHECK-LABEL: subvector_v8f32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
|
@ -452,29 +311,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v32f32(<32 x float> *%in, <32 x float>* %out) #0 {
|
define void @subvector_v32f32(<32 x float> *%in, <32 x float>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v32f32:
|
; CHECK-LABEL: subvector_v32f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #24
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #16
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #8
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v32f32:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
|
||||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <32 x float>, <32 x float>* %in
|
%a = load <32 x float>, <32 x float>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -483,41 +326,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v64f32(<64 x float> *%in, <64 x float>* %out) #0 {
|
define void @subvector_v64f32(<64 x float> *%in, <64 x float>* %out) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v64f32:
|
; CHECK-LABEL: subvector_v64f32:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #56
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_256-NEXT: mov x9, #48
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #40
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #32
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #24
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #16
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #8
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x1, x11, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x10, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x1, x13, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x1, x12, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x1, x14, lsl #2]
|
|
||||||
; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: subvector_v64f32:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
|
||||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%a = load <64 x float>, <64 x float>* %in
|
%a = load <64 x float>, <64 x float>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -550,29 +365,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v16f64(<16 x double> *%in, <16 x double>* %out) #0 {
|
define void @subvector_v16f64(<16 x double> *%in, <16 x double>* %out) vscale_range(8,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v16f64:
|
; CHECK-LABEL: subvector_v16f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #12
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_256-NEXT: mov x9, #8
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #4
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_1024-LABEL: subvector_v16f64:
|
|
||||||
; VBITS_GE_1024: // %bb.0:
|
|
||||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
|
||||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_1024-NEXT: ret
|
|
||||||
%a = load <16 x double>, <16 x double>* %in
|
%a = load <16 x double>, <16 x double>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
@ -581,41 +380,13 @@ bb1:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @subvector_v32f64(<32 x double> *%in, <32 x double>* %out) #0 {
|
define void @subvector_v32f64(<32 x double> *%in, <32 x double>* %out) vscale_range(16,0) #0 {
|
||||||
; VBITS_GE_256-LABEL: subvector_v32f64:
|
; CHECK-LABEL: subvector_v32f64:
|
||||||
; VBITS_GE_256: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_256-NEXT: mov x8, #28
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_256-NEXT: mov x9, #24
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_256-NEXT: mov x10, #20
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||||
; VBITS_GE_256-NEXT: mov x11, #16
|
; CHECK-NEXT: ret
|
||||||
; VBITS_GE_256-NEXT: mov x12, #12
|
|
||||||
; VBITS_GE_256-NEXT: mov x13, #8
|
|
||||||
; VBITS_GE_256-NEXT: mov x14, #4
|
|
||||||
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x0, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x0, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z6.d }, p0/z, [x0, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z7.d }, p0/z, [x0, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x9, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x1, x11, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x1, x10, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z6.d }, p0, [x1, x13, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z5.d }, p0, [x1, x12, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z7.d }, p0, [x1, x14, lsl #3]
|
|
||||||
; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x1]
|
|
||||||
; VBITS_GE_256-NEXT: ret
|
|
||||||
;
|
|
||||||
; VBITS_GE_2048-LABEL: subvector_v32f64:
|
|
||||||
; VBITS_GE_2048: // %bb.0:
|
|
||||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
|
||||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
|
|
||||||
; VBITS_GE_2048-NEXT: ret
|
|
||||||
%a = load <32 x double>, <32 x double>* %in
|
%a = load <32 x double>, <32 x double>* %in
|
||||||
br label %bb1
|
br label %bb1
|
||||||
|
|
||||||
|
|
|
@ -1,43 +1,30 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) vscale_range(2,0) #0 {
|
||||||
; NO_SVE-NOT: ptrue
|
; CHECK-LABEL: store_trunc_v2i64i8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) #0 {
|
; CHECK-NEXT: ldr q0, [x0]
|
||||||
; CHECK-LABEL: store_trunc_v2i64i8
|
; CHECK-NEXT: ptrue p0.d, vl2
|
||||||
; CHECK: ldr q[[Q0:[0-9]+]], [x0]
|
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
|
||||||
; CHECK: ptrue p[[P0:[0-9]+]].d, vl2
|
; CHECK-NEXT: ret
|
||||||
; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x1]
|
|
||||||
; CHECK-NEXT: ret
|
|
||||||
%a = load <2 x i64>, <2 x i64>* %ap
|
%a = load <2 x i64>, <2 x i64>* %ap
|
||||||
%val = trunc <2 x i64> %a to <2 x i8>
|
%val = trunc <2 x i64> %a to <2 x i8>
|
||||||
store <2 x i8> %val, <2 x i8>* %dest
|
store <2 x i8> %val, <2 x i8>* %dest
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) #0 {
|
define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v4i64i8
|
; CHECK-LABEL: store_trunc_v4i64i8:
|
||||||
; CHECK: ptrue p[[P0:[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <4 x i64>, <4 x i64>* %ap
|
%a = load <4 x i64>, <4 x i64>* %ap
|
||||||
%val = trunc <4 x i64> %a to <4 x i8>
|
%val = trunc <4 x i64> %a to <4 x i8>
|
||||||
store <4 x i8> %val, <4 x i8>* %dest
|
store <4 x i8> %val, <4 x i8>* %dest
|
||||||
|
@ -45,48 +32,52 @@ define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
|
define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v8i64i8:
|
; VBITS_GE_256-LABEL: store_trunc_v8i64i8:
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].d }, p[[P0]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4
|
; VBITS_GE_256-NEXT: st1b { z1.s }, p0, [x1]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
|
;
|
||||||
; VBITS_EQ_256-DAG: splice [[WORDS:z[0-9]+]].s, [[PG2]], [[WORDS_LO]].s, [[WORDS_HI]].s
|
; VBITS_GE_512-LABEL: store_trunc_v8i64i8:
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl8
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: st1b { [[WORDS]].s }, [[PG3]], [x1]
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %ap
|
%a = load <8 x i64>, <8 x i64>* %ap
|
||||||
%val = trunc <8 x i64> %a to <8 x i8>
|
%val = trunc <8 x i64> %a to <8 x i8>
|
||||||
store <8 x i8> %val, <8 x i8>* %dest
|
store <8 x i8> %val, <8 x i8>* %dest
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) #0 {
|
define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v16i64i8:
|
; CHECK-LABEL: store_trunc_v16i64i8:
|
||||||
; VBITS_GE_1024: ptrue p[[P0:[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: st1b { [[Z0]].d }, p[[P0]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <16 x i64>, <16 x i64>* %ap
|
%a = load <16 x i64>, <16 x i64>* %ap
|
||||||
%val = trunc <16 x i64> %a to <16 x i8>
|
%val = trunc <16 x i64> %a to <16 x i8>
|
||||||
store <16 x i8> %val, <16 x i8>* %dest
|
store <16 x i8> %val, <16 x i8>* %dest
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) #0 {
|
define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v32i64i8:
|
; CHECK-LABEL: store_trunc_v32i64i8:
|
||||||
; VBITS_GE_2048: ptrue p[[P0:[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048-NEXT: st1b { [[Z0]].d }, p[[P0]], [x1]
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048-NEXT: ret
|
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i64>, <32 x i64>* %ap
|
%a = load <32 x i64>, <32 x i64>* %ap
|
||||||
%val = trunc <32 x i64> %a to <32 x i8>
|
%val = trunc <32 x i64> %a to <32 x i8>
|
||||||
store <32 x i8> %val, <32 x i8>* %dest
|
store <32 x i8> %val, <32 x i8>* %dest
|
||||||
|
@ -94,25 +85,27 @@ define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
|
define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v8i64i16:
|
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: st1h { [[Z0]].d }, p[[P0]], [x1]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
; Currently does not use the truncating store
|
; Currently does not use the truncating store
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
; VBITS_GE_256-LABEL: store_trunc_v8i64i16:
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: uzp1 z[[HALFS_LO:[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-DAG: uzp1 z[[HALFS_HI:[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-NEXT: mov v[[HALFS_LO]].d[1], v[[HALFS_HI]].d[0]
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_EQ_256-NEXT: str q[[HALFS_LO]], [x1]
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
|
||||||
|
; VBITS_GE_256-NEXT: str q1, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: store_trunc_v8i64i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %ap
|
%a = load <8 x i64>, <8 x i64>* %ap
|
||||||
%val = trunc <8 x i64> %a to <8 x i16>
|
%val = trunc <8 x i64> %a to <8 x i16>
|
||||||
store <8 x i16> %val, <8 x i16>* %dest
|
store <8 x i16> %val, <8 x i16>* %dest
|
||||||
|
@ -120,24 +113,26 @@ define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
|
define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v8i64i32:
|
; VBITS_GE_256-LABEL: store_trunc_v8i64i32:
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: st1w { [[Z0]].d }, p[[P0]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
||||||
; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s
|
;
|
||||||
; VBITS_EQ_256-DAG: splice [[WORDS:z[0-9]+]].s, [[PG1]], [[WORDS_LO]].s, [[WORDS_HI]].s
|
; VBITS_GE_512-LABEL: store_trunc_v8i64i32:
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl8
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: st1w { [[WORDS]].s }, [[PG3]], [x1]
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %ap
|
%a = load <8 x i64>, <8 x i64>* %ap
|
||||||
%val = trunc <8 x i64> %a to <8 x i32>
|
%val = trunc <8 x i64> %a to <8 x i32>
|
||||||
store <8 x i32> %val, <8 x i32>* %dest
|
store <8 x i32> %val, <8 x i32>* %dest
|
||||||
|
@ -145,25 +140,27 @@ define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
|
define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v16i32i8:
|
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
|
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
|
|
||||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].s }, p[[P0]], [x1]
|
|
||||||
; VBITS_GE_512-NEXT: ret
|
|
||||||
|
|
||||||
; Ensure sensible type legalisation.
|
|
||||||
; Currently does not use the truncating store
|
; Currently does not use the truncating store
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
; VBITS_GE_256-LABEL: store_trunc_v16i32i8:
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; VBITS_EQ_256-DAG: uzp1 z[[BYTES_LO:[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_EQ_256-DAG: uzp1 z[[BYTES_HI:[0-9]+]].b, [[HALFS_HI]].b, [[HALFS_HI]].b
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
; VBITS_EQ_256-NEXT: mov v[[BYTES_LO]].d[1], v[[BYTES_HI]].d[0]
|
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
; VBITS_EQ_256-NEXT: str q[[BYTES_LO]], [x1]
|
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
|
||||||
|
; VBITS_GE_256-NEXT: str q1, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: store_trunc_v16i32i8:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <16 x i32>, <16 x i32>* %ap
|
%a = load <16 x i32>, <16 x i32>* %ap
|
||||||
%val = trunc <16 x i32> %a to <16 x i8>
|
%val = trunc <16 x i32> %a to <16 x i8>
|
||||||
store <16 x i8> %val, <16 x i8>* %dest
|
store <16 x i8> %val, <16 x i8>* %dest
|
||||||
|
@ -171,24 +168,26 @@ define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
|
define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v16i32i16:
|
; VBITS_GE_256-LABEL: store_trunc_v16i32i16:
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: st1h { [[Z0]].s }, p[[P0]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
||||||
; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #2]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].h, vl8
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h
|
;
|
||||||
; VBITS_EQ_256-DAG: splice [[HALFS:z[0-9]+]].h, [[PG2]], [[HALFS_LO]].h, [[HALFS_HI]].h
|
; VBITS_GE_512-LABEL: store_trunc_v16i32i16:
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl16
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: st1h { [[HALFS]].h }, [[PG3]], [x1]
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <16 x i32>, <16 x i32>* %ap
|
%a = load <16 x i32>, <16 x i32>* %ap
|
||||||
%val = trunc <16 x i32> %a to <16 x i16>
|
%val = trunc <16 x i32> %a to <16 x i16>
|
||||||
store <16 x i16> %val, <16 x i16>* %dest
|
store <16 x i16> %val, <16 x i16>* %dest
|
||||||
|
@ -196,24 +195,26 @@ define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
|
define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
|
||||||
; CHECK-LABEL: store_trunc_v32i16i8:
|
; VBITS_GE_256-LABEL: store_trunc_v32i16i8:
|
||||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].h }, p[[P0]], [x1]
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
; Ensure sensible type legalisation
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl16
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
|
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
|
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[HALFS_LO:z[0-9]+]].h }, [[PG1]]/z, [x0]
|
; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b
|
||||||
; VBITS_EQ_256-DAG: ld1h { [[HALFS_HI:z[0-9]+]].h }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #1]
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].b, vl16
|
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1]
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[BYTES_LO:z[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b
|
; VBITS_GE_256-NEXT: ret
|
||||||
; VBITS_EQ_256-DAG: uzp1 [[BYTES_HI:z[0-9]+]].b, [[HALFS_HI]].b, [[HALFS_HI]].b
|
;
|
||||||
; VBITS_EQ_256-DAG: splice [[BYTES:z[0-9]+]].b, [[PG2]], [[BYTES_LO]].b, [[BYTES_HI]].b
|
; VBITS_GE_512-LABEL: store_trunc_v32i16i8:
|
||||||
; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].b, vl32
|
; VBITS_GE_512: // %bb.0:
|
||||||
; VBITS_EQ_256-NEXT: st1b { [[BYTES]].b }, [[PG3]], [x1]
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_EQ_256-NEXT: ret
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <32 x i16>, <32 x i16>* %ap
|
%a = load <32 x i16>, <32 x i16>* %ap
|
||||||
%val = trunc <32 x i16> %a to <32 x i8>
|
%val = trunc <32 x i16> %a to <32 x i8>
|
||||||
store <32 x i8> %val, <32 x i8>* %dest
|
store <32 x i8> %val, <32 x i8>* %dest
|
||||||
|
|
|
@ -1,35 +1,22 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: z{0-9}
|
|
||||||
|
|
||||||
;
|
;
|
||||||
; truncate i16 -> i8
|
; truncate i16 -> i8
|
||||||
;
|
;
|
||||||
|
|
||||||
define <16 x i8> @trunc_v16i16_v16i8(<16 x i16>* %in) #0 {
|
define <16 x i8> @trunc_v16i16_v16i8(<16 x i16>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i16_v16i8:
|
; CHECK-LABEL: trunc_v16i16_v16i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { [[A_HALFS:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; CHECK-NEXT: uzp1 z0.b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <16 x i16>, <16 x i16>* %in
|
%a = load <16 x i16>, <16 x i16>* %in
|
||||||
%b = trunc <16 x i16> %a to <16 x i8>
|
%b = trunc <16 x i16> %a to <16 x i8>
|
||||||
ret <16 x i8> %b
|
ret <16 x i8> %b
|
||||||
|
@ -37,11 +24,30 @@ define <16 x i8> @trunc_v16i16_v16i8(<16 x i16>* %in) #0 {
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i16_v32i8(<32 x i16>* %in, <32 x i8>* %out) #0 {
|
define void @trunc_v32i16_v32i8(<32 x i16>* %in, <32 x i8>* %out) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i16_v32i8:
|
; VBITS_GE_256-LABEL: trunc_v32i16_v32i8:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512: ld1h { [[A_HALFS:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #16
|
||||||
; VBITS_GE_512: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_512: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||||
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl16
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
|
||||||
|
; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
|
||||||
|
; VBITS_GE_256-NEXT: add z0.b, z1.b, z1.b
|
||||||
|
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v32i16_v32i8:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||||
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.b, vl32
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_512-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <32 x i16>, <32 x i16>* %in
|
%a = load <32 x i16>, <32 x i16>* %in
|
||||||
%b = trunc <32 x i16> %a to <32 x i8>
|
%b = trunc <32 x i16> %a to <32 x i8>
|
||||||
%c = add <32 x i8> %b, %b
|
%c = add <32 x i8> %b, %b
|
||||||
|
@ -50,12 +56,16 @@ define void @trunc_v32i16_v32i8(<32 x i16>* %in, <32 x i8>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v64i16_v64i8(<64 x i16>* %in, <64 x i8>* %out) #0 {
|
define void @trunc_v64i16_v64i8(<64 x i16>* %in, <64 x i8>* %out) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v64i16_v64i8:
|
; CHECK-LABEL: trunc_v64i16_v64i8:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024: ld1h { [[A_HALFS:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
; VBITS_GE_1024: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_1024: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; CHECK-NEXT: ptrue p0.b, vl64
|
||||||
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <64 x i16>, <64 x i16>* %in
|
%a = load <64 x i16>, <64 x i16>* %in
|
||||||
%b = trunc <64 x i16> %a to <64 x i8>
|
%b = trunc <64 x i16> %a to <64 x i8>
|
||||||
%c = add <64 x i8> %b, %b
|
%c = add <64 x i8> %b, %b
|
||||||
|
@ -64,12 +74,16 @@ define void @trunc_v64i16_v64i8(<64 x i16>* %in, <64 x i8>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v128i16_v128i8(<128 x i16>* %in, <128 x i8>* %out) #0 {
|
define void @trunc_v128i16_v128i8(<128 x i16>* %in, <128 x i8>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v128i16_v128i8:
|
; CHECK-LABEL: trunc_v128i16_v128i8:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1h { [[A_HALFS:z[0-9]+]].h }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.h, vl128
|
||||||
; VBITS_GE_2048: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; CHECK-NEXT: ptrue p0.b, vl128
|
||||||
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <128 x i16>, <128 x i16>* %in
|
%a = load <128 x i16>, <128 x i16>* %in
|
||||||
%b = trunc <128 x i16> %a to <128 x i8>
|
%b = trunc <128 x i16> %a to <128 x i8>
|
||||||
%c = add <128 x i8> %b, %b
|
%c = add <128 x i8> %b, %b
|
||||||
|
@ -81,38 +95,60 @@ define void @trunc_v128i16_v128i8(<128 x i16>* %in, <128 x i8>* %out) #0 {
|
||||||
; truncate i32 -> i8
|
; truncate i32 -> i8
|
||||||
;
|
;
|
||||||
|
|
||||||
define <8 x i8> @trunc_v8i32_v8i8(<8 x i32>* %in) #0 {
|
define <8 x i8> @trunc_v8i32_v8i8(<8 x i32>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v8i32_v8i8:
|
; CHECK-LABEL: trunc_v8i32_v8i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: uzp1 z0.b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <8 x i32>, <8 x i32>* %in
|
%a = load <8 x i32>, <8 x i32>* %in
|
||||||
%b = trunc <8 x i32> %a to <8 x i8>
|
%b = trunc <8 x i32> %a to <8 x i8>
|
||||||
ret <8 x i8> %b
|
ret <8 x i8> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %in) #0 {
|
define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %in) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i32_v16i8:
|
; VBITS_GE_256-LABEL: trunc_v16i32_v16i8:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512-NEXT: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512-NEXT: uzp1 z0.b, [[A_HALFS]].b, [[A_HALFS]].b
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z2.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.b, z1.b, z1.b
|
||||||
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v16i32_v16i8:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <16 x i32>, <16 x i32>* %in
|
%a = load <16 x i32>, <16 x i32>* %in
|
||||||
%b = trunc <16 x i32> %a to <16 x i8>
|
%b = trunc <16 x i32> %a to <16 x i8>
|
||||||
ret <16 x i8> %b
|
ret <16 x i8> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i32_v32i8(<32 x i32>* %in, <32 x i8>* %out) #0 {
|
define void @trunc_v32i32_v32i8(<32 x i32>* %in, <32 x i8>* %out) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i32_v32i8:
|
; CHECK-LABEL: trunc_v32i32_v32i8:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_1024: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i32>, <32 x i32>* %in
|
%a = load <32 x i32>, <32 x i32>* %in
|
||||||
%b = trunc <32 x i32> %a to <32 x i8>
|
%b = trunc <32 x i32> %a to <32 x i8>
|
||||||
%c = add <32 x i8> %b, %b
|
%c = add <32 x i8> %b, %b
|
||||||
|
@ -121,13 +157,17 @@ define void @trunc_v32i32_v32i8(<32 x i32>* %in, <32 x i8>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v64i32_v64i8(<64 x i32>* %in, <64 x i8>* %out) #0 {
|
define void @trunc_v64i32_v64i8(<64 x i32>* %in, <64 x i8>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v64i32_v64i8:
|
; CHECK-LABEL: trunc_v64i32_v64i8:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: ptrue p0.b, vl64
|
||||||
; VBITS_GE_2048: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <64 x i32>, <64 x i32>* %in
|
%a = load <64 x i32>, <64 x i32>* %in
|
||||||
%b = trunc <64 x i32> %a to <64 x i8>
|
%b = trunc <64 x i32> %a to <64 x i8>
|
||||||
%c = add <64 x i8> %b, %b
|
%c = add <64 x i8> %b, %b
|
||||||
|
@ -139,12 +179,14 @@ define void @trunc_v64i32_v64i8(<64 x i32>* %in, <64 x i8>* %out) #0 {
|
||||||
; truncate i32 -> i16
|
; truncate i32 -> i16
|
||||||
;
|
;
|
||||||
|
|
||||||
define <8 x i16> @trunc_v8i32_v8i16(<8 x i32>* %in) #0 {
|
define <8 x i16> @trunc_v8i32_v8i16(<8 x i32>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v8i32_v8i16:
|
; CHECK-LABEL: trunc_v8i32_v8i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl8
|
||||||
; CHECK-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <8 x i32>, <8 x i32>* %in
|
%a = load <8 x i32>, <8 x i32>* %in
|
||||||
%b = trunc <8 x i32> %a to <8 x i16>
|
%b = trunc <8 x i32> %a to <8 x i16>
|
||||||
ret <8 x i16> %b
|
ret <8 x i16> %b
|
||||||
|
@ -152,11 +194,30 @@ define <8 x i16> @trunc_v8i32_v8i16(<8 x i32>* %in) #0 {
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v16i32_v16i16(<16 x i32>* %in, <16 x i16>* %out) #0 {
|
define void @trunc_v16i32_v16i16(<16 x i32>* %in, <16 x i16>* %out) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i32_v16i16:
|
; VBITS_GE_256-LABEL: trunc_v16i32_v16i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #8
|
||||||
; VBITS_GE_512: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
; VBITS_GE_512: add [[A_HALFS]].h, [[A_HALFS]].h, [[A_HALFS]].h
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||||
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
||||||
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
||||||
|
; VBITS_GE_256-NEXT: add z0.h, z1.h, z1.h
|
||||||
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v16i32_v16i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||||
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: add z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <16 x i32>, <16 x i32>* %in
|
%a = load <16 x i32>, <16 x i32>* %in
|
||||||
%b = trunc <16 x i32> %a to <16 x i16>
|
%b = trunc <16 x i32> %a to <16 x i16>
|
||||||
%c = add <16 x i16> %b, %b
|
%c = add <16 x i16> %b, %b
|
||||||
|
@ -165,12 +226,16 @@ define void @trunc_v16i32_v16i16(<16 x i32>* %in, <16 x i16>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i32_v32i16(<32 x i32>* %in, <32 x i16>* %out) #0 {
|
define void @trunc_v32i32_v32i16(<32 x i32>* %in, <32 x i16>* %out) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i32_v32i16:
|
; CHECK-LABEL: trunc_v32i32_v32i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
; VBITS_GE_1024: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_1024: add [[A_HALFS]].h, [[A_HALFS]].h, [[A_HALFS]].h
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i32>, <32 x i32>* %in
|
%a = load <32 x i32>, <32 x i32>* %in
|
||||||
%b = trunc <32 x i32> %a to <32 x i16>
|
%b = trunc <32 x i32> %a to <32 x i16>
|
||||||
%c = add <32 x i16> %b, %b
|
%c = add <32 x i16> %b, %b
|
||||||
|
@ -179,12 +244,16 @@ define void @trunc_v32i32_v32i16(<32 x i32>* %in, <32 x i16>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v64i32_v64i16(<64 x i32>* %in, <64 x i16>* %out) #0 {
|
define void @trunc_v64i32_v64i16(<64 x i32>* %in, <64 x i16>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v64i32_v64i16:
|
; CHECK-LABEL: trunc_v64i32_v64i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1w { [[A_WORDS:z[0-9]+]].s }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.s, vl64
|
||||||
; VBITS_GE_2048: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: add [[A_HALFS]].h, [[A_HALFS]].h, [[A_HALFS]].h
|
; CHECK-NEXT: ptrue p0.h, vl64
|
||||||
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <64 x i32>, <64 x i32>* %in
|
%a = load <64 x i32>, <64 x i32>* %in
|
||||||
%b = trunc <64 x i32> %a to <64 x i16>
|
%b = trunc <64 x i32> %a to <64 x i16>
|
||||||
%c = add <64 x i16> %b, %b
|
%c = add <64 x i16> %b, %b
|
||||||
|
@ -197,53 +266,78 @@ define void @trunc_v64i32_v64i16(<64 x i32>* %in, <64 x i16>* %out) #0 {
|
||||||
;
|
;
|
||||||
|
|
||||||
; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
|
; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
|
||||||
define <4 x i8> @trunc_v4i64_v4i8(<4 x i64>* %in) #0 {
|
define <4 x i8> @trunc_v4i64_v4i8(<4 x i64>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v4i64_v4i8:
|
; CHECK-LABEL: trunc_v4i64_v4i8:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <4 x i64>, <4 x i64>* %in
|
%a = load <4 x i64>, <4 x i64>* %in
|
||||||
%b = trunc <4 x i64> %a to <4 x i8>
|
%b = trunc <4 x i64> %a to <4 x i8>
|
||||||
ret <4 x i8> %b
|
ret <4 x i8> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %in) #0 {
|
define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %in) #0 {
|
||||||
; CHECK-LABEL: trunc_v8i64_v8i8:
|
; VBITS_GE_256-LABEL: trunc_v8i64_v8i8:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: uzp1 z0.b, [[A_HALFS]].b, [[A_HALFS]].b
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v8i64_v8i8:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %in
|
%a = load <8 x i64>, <8 x i64>* %in
|
||||||
%b = trunc <8 x i64> %a to <8 x i8>
|
%b = trunc <8 x i64> %a to <8 x i8>
|
||||||
ret <8 x i8> %b
|
ret <8 x i8> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %in) #0 {
|
define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %in) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i64_v16i8:
|
; CHECK-LABEL: trunc_v16i64_v16i8:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024-NEXT: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_1024-NEXT: uzp1 z0.b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
; VBITS_GE_1024-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <16 x i64>, <16 x i64>* %in
|
%a = load <16 x i64>, <16 x i64>* %in
|
||||||
%b = trunc <16 x i64> %a to <16 x i8>
|
%b = trunc <16 x i64> %a to <16 x i8>
|
||||||
ret <16 x i8> %b
|
ret <16 x i8> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i64_v32i8(<32 x i64>* %in, <32 x i8>* %out) #0 {
|
define void @trunc_v32i64_v32i8(<32 x i64>* %in, <32 x i8>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i64_v32i8:
|
; CHECK-LABEL: trunc_v32i64_v32i8:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ptrue p0.b, vl32
|
||||||
; VBITS_GE_2048: uzp1 [[A_BYTES:z[0-9]+]].b, [[A_HALFS]].b, [[A_HALFS]].b
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; VBITS_GE_2048: add [[A_BYTES]].b, [[A_BYTES]].b, [[A_BYTES]].b
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: add z0.b, z0.b, z0.b
|
||||||
|
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i64>, <32 x i64>* %in
|
%a = load <32 x i64>, <32 x i64>* %in
|
||||||
%b = trunc <32 x i64> %a to <32 x i8>
|
%b = trunc <32 x i64> %a to <32 x i8>
|
||||||
%c = add <32 x i8> %b, %b
|
%c = add <32 x i8> %b, %b
|
||||||
|
@ -255,38 +349,60 @@ define void @trunc_v32i64_v32i8(<32 x i64>* %in, <32 x i8>* %out) #0 {
|
||||||
; truncate i64 -> i16
|
; truncate i64 -> i16
|
||||||
;
|
;
|
||||||
|
|
||||||
define <4 x i16> @trunc_v4i64_v4i16(<4 x i64>* %in) #0 {
|
define <4 x i16> @trunc_v4i64_v4i16(<4 x i64>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v4i64_v4i16:
|
; CHECK-LABEL: trunc_v4i64_v4i16:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <4 x i64>, <4 x i64>* %in
|
%a = load <4 x i64>, <4 x i64>* %in
|
||||||
%b = trunc <4 x i64> %a to <4 x i16>
|
%b = trunc <4 x i64> %a to <4 x i16>
|
||||||
ret <4 x i16> %b
|
ret <4 x i16> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %in) #0 {
|
define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %in) #0 {
|
||||||
; CHECK-LABEL: trunc_v8i64_v8i16:
|
; VBITS_GE_256-LABEL: trunc_v8i64_v8i16:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
; VBITS_GE_512-NEXT: ret
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
||||||
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
||||||
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v8i64_v8i16:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %in
|
%a = load <8 x i64>, <8 x i64>* %in
|
||||||
%b = trunc <8 x i64> %a to <8 x i16>
|
%b = trunc <8 x i64> %a to <8 x i16>
|
||||||
ret <8 x i16> %b
|
ret <8 x i16> %b
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v16i64_v16i16(<16 x i64>* %in, <16 x i16>* %out) #0 {
|
define void @trunc_v16i64_v16i16(<16 x i64>* %in, <16 x i16>* %out) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i64_v16i16:
|
; CHECK-LABEL: trunc_v16i64_v16i16:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ptrue p0.h, vl16
|
||||||
; VBITS_GE_1024: add [[A_HALFS]].h, [[A_HALFS]].h, [[A_HALFS]].h
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <16 x i64>, <16 x i64>* %in
|
%a = load <16 x i64>, <16 x i64>* %in
|
||||||
%b = trunc <16 x i64> %a to <16 x i16>
|
%b = trunc <16 x i64> %a to <16 x i16>
|
||||||
%c = add <16 x i16> %b, %b
|
%c = add <16 x i16> %b, %b
|
||||||
|
@ -295,13 +411,17 @@ define void @trunc_v16i64_v16i16(<16 x i64>* %in, <16 x i16>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i64_v32i16(<32 x i64>* %in, <32 x i16>* %out) #0 {
|
define void @trunc_v32i64_v32i16(<32 x i64>* %in, <32 x i16>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i64_v32i16:
|
; CHECK-LABEL: trunc_v32i64_v32i16:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: uzp1 [[A_HALFS:z[0-9]+]].h, [[A_WORDS]].h, [[A_WORDS]].h
|
; CHECK-NEXT: ptrue p0.h, vl32
|
||||||
; VBITS_GE_2048: add [[A_HALFS]].h, [[A_HALFS]].h, [[A_HALFS]].h
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: add z0.h, z0.h, z0.h
|
||||||
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i64>, <32 x i64>* %in
|
%a = load <32 x i64>, <32 x i64>* %in
|
||||||
%b = trunc <32 x i64> %a to <32 x i16>
|
%b = trunc <32 x i64> %a to <32 x i16>
|
||||||
%c = add <32 x i16> %b, %b
|
%c = add <32 x i16> %b, %b
|
||||||
|
@ -313,12 +433,14 @@ define void @trunc_v32i64_v32i16(<32 x i64>* %in, <32 x i16>* %out) #0 {
|
||||||
; truncate i64 -> i32
|
; truncate i64 -> i32
|
||||||
;
|
;
|
||||||
|
|
||||||
define <4 x i32> @trunc_v4i64_v4i32(<4 x i64>* %in) #0 {
|
define <4 x i32> @trunc_v4i64_v4i32(<4 x i64>* %in) vscale_range(2,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v4i64_v4i32:
|
; CHECK-LABEL: trunc_v4i64_v4i32:
|
||||||
; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl4
|
||||||
; CHECK-NEXT: uzp1 z0.s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <4 x i64>, <4 x i64>* %in
|
%a = load <4 x i64>, <4 x i64>* %in
|
||||||
%b = trunc <4 x i64> %a to <4 x i32>
|
%b = trunc <4 x i64> %a to <4 x i32>
|
||||||
ret <4 x i32> %b
|
ret <4 x i32> %b
|
||||||
|
@ -326,11 +448,30 @@ define <4 x i32> @trunc_v4i64_v4i32(<4 x i64>* %in) #0 {
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v8i64_v8i32(<8 x i64>* %in, <8 x i32>* %out) #0 {
|
define void @trunc_v8i64_v8i32(<8 x i64>* %in, <8 x i32>* %out) #0 {
|
||||||
; CHECK-LABEL: trunc_v8i64_v8i32:
|
; VBITS_GE_256-LABEL: trunc_v8i64_v8i32:
|
||||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
|
; VBITS_GE_256: // %bb.0:
|
||||||
; VBITS_GE_512: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; VBITS_GE_256-NEXT: mov x8, #4
|
||||||
; VBITS_GE_512: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
||||||
; VBITS_GE_512: add [[A_WORDS]].s, [[A_WORDS]].s, [[A_WORDS]].s
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||||
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
||||||
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
||||||
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
||||||
|
; VBITS_GE_256-NEXT: add z0.s, z1.s, z1.s
|
||||||
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; VBITS_GE_256-NEXT: ret
|
||||||
|
;
|
||||||
|
; VBITS_GE_512-LABEL: trunc_v8i64_v8i32:
|
||||||
|
; VBITS_GE_512: // %bb.0:
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||||
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
|
||||||
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: add z0.s, z0.s, z0.s
|
||||||
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; VBITS_GE_512-NEXT: ret
|
||||||
%a = load <8 x i64>, <8 x i64>* %in
|
%a = load <8 x i64>, <8 x i64>* %in
|
||||||
%b = trunc <8 x i64> %a to <8 x i32>
|
%b = trunc <8 x i64> %a to <8 x i32>
|
||||||
%c = add <8 x i32> %b, %b
|
%c = add <8 x i32> %b, %b
|
||||||
|
@ -339,12 +480,16 @@ define void @trunc_v8i64_v8i32(<8 x i64>* %in, <8 x i32>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v16i64_v16i32(<16 x i64>* %in, <16 x i32>* %out) #0 {
|
define void @trunc_v16i64_v16i32(<16 x i64>* %in, <16 x i32>* %out) vscale_range(8,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v16i64_v16i32:
|
; CHECK-LABEL: trunc_v16i64_v16i32:
|
||||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_1024: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl16
|
||||||
; VBITS_GE_1024: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_1024: add [[A_WORDS]].s, [[A_WORDS]].s, [[A_WORDS]].s
|
; CHECK-NEXT: ptrue p0.s, vl16
|
||||||
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: add z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <16 x i64>, <16 x i64>* %in
|
%a = load <16 x i64>, <16 x i64>* %in
|
||||||
%b = trunc <16 x i64> %a to <16 x i32>
|
%b = trunc <16 x i64> %a to <16 x i32>
|
||||||
%c = add <16 x i32> %b, %b
|
%c = add <16 x i32> %b, %b
|
||||||
|
@ -353,12 +498,16 @@ define void @trunc_v16i64_v16i32(<16 x i64>* %in, <16 x i32>* %out) #0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
|
||||||
define void @trunc_v32i64_v32i32(<32 x i64>* %in, <32 x i32>* %out) #0 {
|
define void @trunc_v32i64_v32i32(<32 x i64>* %in, <32 x i32>* %out) vscale_range(16,0) #0 {
|
||||||
; CHECK-LABEL: trunc_v32i64_v32i32:
|
; CHECK-LABEL: trunc_v32i64_v32i32:
|
||||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
|
; CHECK: // %bb.0:
|
||||||
; VBITS_GE_2048: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
|
; CHECK-NEXT: ptrue p0.d, vl32
|
||||||
; VBITS_GE_2048: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||||
; VBITS_GE_2048: add [[A_WORDS]].s, [[A_WORDS]].s, [[A_WORDS]].s
|
; CHECK-NEXT: ptrue p0.s, vl32
|
||||||
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: add z0.s, z0.s, z0.s
|
||||||
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%a = load <32 x i64>, <32 x i64>* %in
|
%a = load <32 x i64>, <32 x i64>* %in
|
||||||
%b = trunc <32 x i64> %a to <32 x i32>
|
%b = trunc <32 x i64> %a to <32 x i32>
|
||||||
%c = add <32 x i32> %b, %b
|
%c = add <32 x i32> %b, %b
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue