[AArch64] Add a tablegen pattern for SQXTN2.

Converts concat_vectors(Vd, trunc(smin(smax Vm, -2^n), 2^n-1) to
sqxtn2(Vd, Vm). Deliberately not handling v2i64 ~> v2i32 as the
min/max nodes are not legal (same thing we did for the SQXTN
patterns in https://reviews.llvm.org/D103263).

Differential Revision: https://reviews.llvm.org/D116105
This commit is contained in:
Alexandros Lamprineas 2021-12-23 15:14:40 +00:00
parent fd3cde600b
commit e70ef6d924
3 changed files with 75 additions and 22 deletions

View File

@ -4378,6 +4378,32 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))),
(SQXTNv4i16 V128:$Vn)>;
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
// with reversed min/max
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
(v8i16 VImm7F)))))),
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
(v8i16 VImm80)))))),
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
// with reversed min/max
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
(v4i32 VImm7FFF)))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
//===----------------------------------------------------------------------===//
// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
; Test the (concat_vectors (X), (trunc(smin(smax(Y, -2^n), 2^n-1))) pattern.
define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
; CHECK-LABEL: test_combine_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: sqxtn2 v0.16b, v1.8h
; CHECK-NEXT: ret
entry:
%min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %y, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>)
%max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %min, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>)
%trunc = trunc <8 x i16> %max to <8 x i8>
%shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
; CHECK-LABEL: test_combine_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: sqxtn2 v0.8h, v1.4s
; CHECK-NEXT: ret
entry:
%max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
%min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %max, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
%trunc = trunc <4 x i32> %min to <4 x i16>
%shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)

View File

@ -315,17 +315,12 @@ entry:
define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-CVT-LABEL: stest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8
; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s
; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s
; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: stest_f16i16:
@ -1028,17 +1023,12 @@ entry:
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-CVT-LABEL: stest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8
; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s
; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s
; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s
; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s
; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s
; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s
; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: stest_f16i16_mm: