forked from OSchip/llvm-project
[AArch64] Add a tablegen pattern for SQXTN2.
Converts concat_vectors(Vd, trunc(smin(smax Vm, -2^n), 2^n-1) to sqxtn2(Vd, Vm). Deliberately not handling v2i64 ~> v2i32 as the min/max nodes are not legal (same thing we did for the SQXTN patterns in https://reviews.llvm.org/D103263). Differential Revision: https://reviews.llvm.org/D116105
This commit is contained in:
parent
fd3cde600b
commit
e70ef6d924
|
@ -4378,6 +4378,32 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
|
|||
(v4i32 VImm8000)))),
|
||||
(SQXTNv4i16 V128:$Vn)>;
|
||||
|
||||
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
|
||||
// with reversed min/max
|
||||
def : Pat<(v16i8 (concat_vectors
|
||||
(v8i8 V64:$Vd),
|
||||
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
|
||||
(v8i16 VImm7F)))))),
|
||||
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
|
||||
def : Pat<(v16i8 (concat_vectors
|
||||
(v8i8 V64:$Vd),
|
||||
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
|
||||
(v8i16 VImm80)))))),
|
||||
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
|
||||
|
||||
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
|
||||
// with reversed min/max
|
||||
def : Pat<(v8i16 (concat_vectors
|
||||
(v4i16 V64:$Vd),
|
||||
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
|
||||
(v4i32 VImm7FFF)))))),
|
||||
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
|
||||
def : Pat<(v8i16 (concat_vectors
|
||||
(v4i16 V64:$Vd),
|
||||
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
|
||||
(v4i32 VImm8000)))))),
|
||||
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Advanced SIMD three vector instructions.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
|
||||
|
||||
; Test the (concat_vectors (X), (trunc(smin(smax(Y, -2^n), 2^n-1))) pattern.
|
||||
|
||||
define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
|
||||
; CHECK-LABEL: test_combine_v8i16_to_v16i8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: sqxtn2 v0.16b, v1.8h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %y, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>)
|
||||
%max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %min, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>)
|
||||
%trunc = trunc <8 x i16> %max to <8 x i8>
|
||||
%shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: test_combine_v4i32_to_v8i16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: sqxtn2 v0.8h, v1.4s
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
|
||||
%min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %max, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
|
||||
%trunc = trunc <4 x i32> %min to <4 x i16>
|
||||
%shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
|
|
@ -315,17 +315,12 @@ entry:
|
|||
define <8 x i16> @stest_f16i16(<8 x half> %x) {
|
||||
; CHECK-CVT-LABEL: stest_f16i16:
|
||||
; CHECK-CVT: // %bb.0: // %entry
|
||||
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
|
||||
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
|
||||
; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
|
||||
; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8
|
||||
; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
|
||||
; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
|
||||
; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s
|
||||
; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s
|
||||
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
|
||||
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
|
||||
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
|
||||
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
|
||||
; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s
|
||||
; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s
|
||||
; CHECK-CVT-NEXT: ret
|
||||
;
|
||||
; CHECK-FP16-LABEL: stest_f16i16:
|
||||
|
@ -1028,17 +1023,12 @@ entry:
|
|||
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
|
||||
; CHECK-CVT-LABEL: stest_f16i16_mm:
|
||||
; CHECK-CVT: // %bb.0: // %entry
|
||||
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
|
||||
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
|
||||
; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
|
||||
; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8
|
||||
; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
|
||||
; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
|
||||
; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s
|
||||
; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s
|
||||
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
|
||||
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
|
||||
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
|
||||
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
|
||||
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
|
||||
; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s
|
||||
; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s
|
||||
; CHECK-CVT-NEXT: ret
|
||||
;
|
||||
; CHECK-FP16-LABEL: stest_f16i16_mm:
|
||||
|
|
Loading…
Reference in New Issue