forked from OSchip/llvm-project
[SVE] Use DUPM to handling more splat immediate cases.
NOTE: Only considers i64 based vectors at this time because smaller element types require extra isel operand parsing. Differential Revision: https://reviews.llvm.org/D118040
This commit is contained in:
parent
5da7c04003
commit
66bd7ebdf7
|
@ -1708,6 +1708,9 @@ multiclass sve_int_dup_mask_imm<string asm> {
|
|||
(!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
|
||||
def : InstAlias<"mov $Zd, $imm",
|
||||
(!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
|
||||
|
||||
def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
|
||||
(!cast<Instruction>(NAME) logical_imm64:$imm)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -133,9 +133,8 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smax_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
|
||||
|
@ -277,9 +276,8 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smin_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
|
||||
|
@ -385,9 +383,8 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umax_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
|
||||
|
@ -493,9 +490,8 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umin_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
|
||||
|
@ -627,9 +623,8 @@ define <vscale x 4 x i32> @mul_i32_range(<vscale x 4 x i32> %a) {
|
|||
define <vscale x 2 x i64> @mul_i64_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: mul_i64_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #255
|
||||
; CHECK-NEXT: mov z1.d, #255 // =0xff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
|
||||
|
|
|
@ -514,9 +514,8 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smax_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
|
@ -832,9 +831,8 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umax_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
|
@ -991,9 +989,8 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
|
|||
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umin_i64_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
|
||||
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
|
|
|
@ -73,8 +73,8 @@ define <vscale x 4 x i32> @sve_splat_4xi32_imm() {
|
|||
ret <vscale x 4 x i32> %splat
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
|
||||
; CHECK-LABEL: sve_splat_2xi64_imm:
|
||||
define <vscale x 2 x i64> @sve_splat_2xi64_dup_imm() {
|
||||
; CHECK-LABEL: sve_splat_2xi64_dup_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -83,6 +83,16 @@ define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
|
|||
ret <vscale x 2 x i64> %splat
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
|
||||
; CHECK-LABEL: sve_splat_2xi64_dupm_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #0xffff00000000
|
||||
; CHECK-NEXT: ret
|
||||
%ins = insertelement <vscale x 2 x i64> undef, i64 281470681743360, i32 0 ; 0xffff00000000
|
||||
%splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
||||
ret <vscale x 2 x i64> %splat
|
||||
}
|
||||
|
||||
;; Promote splats of smaller illegal integer vector types
|
||||
|
||||
define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
|
||||
|
@ -173,8 +183,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
|
|||
define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
|
||||
; CHECK-LABEL: sve_splat_2xi32_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #-1
|
||||
; CHECK-NEXT: mov z0.d, x8
|
||||
; CHECK-NEXT: mov z0.d, #0xffffffff
|
||||
; CHECK-NEXT: ret
|
||||
%ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
|
||||
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
|
||||
|
@ -530,9 +539,9 @@ define <vscale x 4 x float> @splat_nxv4f32_imm_out_of_range() {
|
|||
define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
|
||||
; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI50_0
|
||||
; CHECK-NEXT: adrp x8, .LCPI51_0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: add x8, x8, :lo12:.LCPI50_0
|
||||
; CHECK-NEXT: add x8, x8, :lo12:.LCPI51_0
|
||||
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: ret
|
||||
%1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0
|
||||
|
|
|
@ -144,10 +144,9 @@ ret <vscale x 4 x i32> %sel
|
|||
define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
|
||||
; CHECK-LABEL: sel_64_illegal_wrong_extension:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #128
|
||||
; CHECK-NEXT: mov z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z0.d, x8
|
||||
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.d, #128 // =0x80
|
||||
; CHECK-NEXT: mov z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
|
||||
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
|
||||
|
@ -370,8 +369,7 @@ ret <vscale x 4 x i32> %sel
|
|||
define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
|
||||
; CHECK-LABEL: sel_merge_64_illegal_wrong_extension:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #128
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #128 // =0x80
|
||||
; CHECK-NEXT: mov z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
|
||||
|
|
|
@ -59,8 +59,7 @@ define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
|
|||
define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: mul_i64_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #255
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #255 // =0xff
|
||||
; CHECK-NEXT: mul z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
|
||||
|
|
Loading…
Reference in New Issue