forked from OSchip/llvm-project
[llvm][Aarch64][SVE] Remove extra fmov instruction with certain literals
When a literal that cannot fit in the immediate form of the fmov instruction is used to initialise an SVE vector, an extra unnecessary fmov is currently generated. This patch adds an extra codegen pattern preventing the extra instruction from being generated. Differential Revision: https://reviews.llvm.org/D96700 Co-Authored-By: Paul Walker <paul.walker@arm.com>
This commit is contained in:
parent
ed86328515
commit
e86f9ba15c
|
@ -553,6 +553,14 @@ let Predicates = [HasSVE] in {
|
|||
def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),
|
||||
(DUP_ZI_D $a, $b)>;
|
||||
|
||||
// Duplicate immediate FP into all vector elements.
|
||||
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))),
|
||||
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
|
||||
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))),
|
||||
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
|
||||
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))),
|
||||
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
|
||||
|
||||
// Duplicate FP immediate into all vector elements
|
||||
let AddedComplexity = 2 in {
|
||||
def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)),
|
||||
|
|
|
@ -130,12 +130,37 @@ define <vscale x 2 x double> @dup_imm_f64(double %b) {
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
|
||||
; CHECK-LABEL: dup_fmov_imm_f32_2:
|
||||
; CHECK: mov w8, #1109917696
|
||||
; CHECK-NEXT: mov z0.s, w8
|
||||
%out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
|
||||
ret <vscale x 2 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
|
||||
; CHECK-LABEL: dup_fmov_imm_f32_4:
|
||||
; CHECK: mov w8, #1109917696
|
||||
; CHECK-NEXT: mov z0.s, w8
|
||||
%out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
|
||||
; CHECK-LABEL: dup_fmov_imm_f64_2:
|
||||
; CHECK: mov x8, #4631107791820423168
|
||||
; CHECK-NEXT: mov z0.d, x8
|
||||
%out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
|
||||
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
|
||||
declare <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
|
||||
|
||||
|
|
|
@ -372,5 +372,32 @@ define <vscale x 4 x float> @splat_nxv4f32_fold(<vscale x 4 x float> %x) {
|
|||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @splat_nxv2f32_fmov_fold() {
|
||||
; CHECK-LABEL: splat_nxv2f32_fmov_fold
|
||||
; CHECK: mov w8, #1109917696
|
||||
; CHECK-NEXT: mov z0.s, w8
|
||||
%1 = insertelement <vscale x 2 x float> undef, float 4.200000e+01, i32 0
|
||||
%2 = shufflevector <vscale x 2 x float> %1, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
|
||||
ret <vscale x 2 x float> %2
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @splat_nxv4f32_fmov_fold() {
|
||||
; CHECK-LABEL: splat_nxv4f32_fmov_fold
|
||||
; CHECK: mov w8, #1109917696
|
||||
; CHECK-NEXT: mov z0.s, w8
|
||||
%1 = insertelement <vscale x 4 x float> undef, float 4.200000e+01, i32 0
|
||||
%2 = shufflevector <vscale x 4 x float> %1, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
|
||||
ret <vscale x 4 x float> %2
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @splat_nxv2f64_fmov_fold() {
|
||||
; CHECK-LABEL: splat_nxv2f64_fmov_fold
|
||||
; CHECK: mov x8, #4631107791820423168
|
||||
; CHECK-NEXT: mov z0.d, x8
|
||||
%1 = insertelement <vscale x 2 x double> undef, double 4.200000e+01, i32 0
|
||||
%2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
|
||||
ret <vscale x 2 x double> %2
|
||||
}
|
||||
|
||||
; +bf16 is required for the bfloat version.
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
||||
|
|
Loading…
Reference in New Issue