[SVE] Use DUPM to handling more splat immediate cases.

NOTE: Only considers i64 based vectors at this time because smaller
element types require extra isel operand parsing.

Differential Revision: https://reviews.llvm.org/D118040
This commit is contained in:
Paul Walker 2022-01-24 12:35:18 +00:00
parent 5da7c04003
commit 66bd7ebdf7
6 changed files with 31 additions and 30 deletions

View File

@ -1708,6 +1708,9 @@ multiclass sve_int_dup_mask_imm<string asm> {
(!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
def : InstAlias<"mov $Zd, $imm",
(!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
(!cast<Instruction>(NAME) logical_imm64:$imm)>;
}
//===----------------------------------------------------------------------===//

View File

@ -133,9 +133,8 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@ -277,9 +276,8 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@ -385,9 +383,8 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@ -493,9 +490,8 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@ -627,9 +623,8 @@ define <vscale x 4 x i32> @mul_i32_range(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @mul_i64_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: mul_i64_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #255
; CHECK-NEXT: mov z1.d, #255 // =0xff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0

View File

@ -514,9 +514,8 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@ -832,9 +831,8 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@ -991,9 +989,8 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

View File

@ -73,8 +73,8 @@ define <vscale x 4 x i32> @sve_splat_4xi32_imm() {
ret <vscale x 4 x i32> %splat
}
define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
; CHECK-LABEL: sve_splat_2xi64_imm:
define <vscale x 2 x i64> @sve_splat_2xi64_dup_imm() {
; CHECK-LABEL: sve_splat_2xi64_dup_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #1 // =0x1
; CHECK-NEXT: ret
@ -83,6 +83,16 @@ define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
ret <vscale x 2 x i64> %splat
}
define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
; CHECK-LABEL: sve_splat_2xi64_dupm_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #0xffff00000000
; CHECK-NEXT: ret
%ins = insertelement <vscale x 2 x i64> undef, i64 281470681743360, i32 0 ; 0xffff00000000
%splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x i64> %splat
}
;; Promote splats of smaller illegal integer vector types
define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
@ -173,8 +183,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
; CHECK-LABEL: sve_splat_2xi32_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-1
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: mov z0.d, #0xffffffff
; CHECK-NEXT: ret
%ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@ -530,9 +539,9 @@ define <vscale x 4 x float> @splat_nxv4f32_imm_out_of_range() {
define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI50_0
; CHECK-NEXT: adrp x8, .LCPI51_0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, x8, :lo12:.LCPI50_0
; CHECK-NEXT: add x8, x8, :lo12:.LCPI51_0
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
; CHECK-NEXT: ret
%1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0

View File

@ -144,10 +144,9 @@ ret <vscale x 4 x i32> %sel
define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
; CHECK-LABEL: sel_64_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: mov z1.d, #128 // =0x80
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
@ -370,8 +369,7 @@ ret <vscale x 4 x i32> %sel
define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
; CHECK-LABEL: sel_merge_64_illegal_wrong_extension:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z1.d, #128 // =0x80
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer

View File

@ -59,8 +59,7 @@ define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
; CHECK-LABEL: mul_i64_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #255
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov z1.d, #255 // =0xff
; CHECK-NEXT: mul z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0