forked from OSchip/llvm-project
[RISCV] Fold store of vmv.f.s to a vse with VL=1.
This patch support the FP part of D109482. Differential Revision: https://reviews.llvm.org/D120235
This commit is contained in:
parent
77fd54d2eb
commit
44a430354d
|
@ -954,6 +954,13 @@ foreach fvti = AllFloatVectors in {
|
|||
//===----------------------------------------------------------------------===//
|
||||
let Predicates = [HasVInstructionsAnyF] in
|
||||
foreach vti = AllFloatVectors in {
|
||||
// Fold store of vmv.f.s to a vse with VL=1.
|
||||
defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX);
|
||||
def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), BaseAddr:$rs1),
|
||||
(store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
|
||||
def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), BaseAddr:$rs1),
|
||||
(store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
|
||||
|
||||
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
|
||||
vti.ScalarSuffix,
|
||||
"_S_", vti.LMul.MX));
|
||||
|
|
|
@ -481,3 +481,32 @@ define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 signext %idx
|
|||
%r = extractelement <vscale x 8 x double> %v, i32 %idx
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define void @store_extractelt_nxv8f64(<vscale x 8 x double>* %x, double* %p) {
|
||||
; CHECK-LABEL: store_extractelt_nxv8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl8re64.v v8, (a0)
|
||||
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vslidedown.vi v8, v8, 1
|
||||
; CHECK-NEXT: vse64.v v8, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <vscale x 8 x double>, <vscale x 8 x double>* %x
|
||||
%b = extractelement <vscale x 8 x double> %a, i64 1
|
||||
store double %b, double* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_vfmv_f_s_nxv8f64(<vscale x 8 x double>* %x, double* %p) {
|
||||
; CHECK-LABEL: store_vfmv_f_s_nxv8f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl8re64.v v8, (a0)
|
||||
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vse64.v v8, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <vscale x 8 x double>, <vscale x 8 x double>* %x
|
||||
%b = call double @llvm.riscv.vfmv.f.s.nxv8f64(<vscale x 8 x double> %a)
|
||||
store double %b, double* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
declare double @llvm.riscv.vfmv.f.s.nxv8f64(<vscale x 8 x double>)
|
||||
|
|
|
@ -596,3 +596,18 @@ define void @store_extractelt_v4i64(<2 x i64>* %x, i64* %p) nounwind {
|
|||
store i64 %b, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_extractelt_v4f64(<2 x double>* %x, double* %p) nounwind {
|
||||
; CHECK-LABEL: store_extractelt_v4f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vslidedown.vi v8, v8, 1
|
||||
; CHECK-NEXT: vse64.v v8, (a1)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = extractelement <2 x double> %a, i64 1
|
||||
store double %b, double* %p
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -58,18 +58,17 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
|
|||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi sp, sp, -32
|
||||
; LMULMAX2-NEXT: .cfi_def_cfa_offset 32
|
||||
; LMULMAX2-NEXT: vsetivli zero, 0, e32, m2, ta, mu
|
||||
; LMULMAX2-NEXT: vfmv.f.s ft0, v10
|
||||
; LMULMAX2-NEXT: fsw ft0, 24(sp)
|
||||
; LMULMAX2-NEXT: vfmv.f.s ft0, v8
|
||||
; LMULMAX2-NEXT: fsw ft0, 16(sp)
|
||||
; LMULMAX2-NEXT: addi a0, sp, 24
|
||||
; LMULMAX2-NEXT: vsetivli zero, 1, e32, m2, ta, mu
|
||||
; LMULMAX2-NEXT: vse32.v v10, (a0)
|
||||
; LMULMAX2-NEXT: vslidedown.vi v10, v10, 7
|
||||
; LMULMAX2-NEXT: vfmv.f.s ft0, v10
|
||||
; LMULMAX2-NEXT: fsw ft0, 28(sp)
|
||||
; LMULMAX2-NEXT: vslidedown.vi v8, v8, 7
|
||||
; LMULMAX2-NEXT: vfmv.f.s ft0, v8
|
||||
; LMULMAX2-NEXT: fsw ft0, 20(sp)
|
||||
; LMULMAX2-NEXT: addi a0, sp, 28
|
||||
; LMULMAX2-NEXT: vse32.v v10, (a0)
|
||||
; LMULMAX2-NEXT: vslidedown.vi v10, v8, 7
|
||||
; LMULMAX2-NEXT: addi a0, sp, 20
|
||||
; LMULMAX2-NEXT: vse32.v v10, (a0)
|
||||
; LMULMAX2-NEXT: addi a0, sp, 16
|
||||
; LMULMAX2-NEXT: vse32.v v8, (a0)
|
||||
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; LMULMAX2-NEXT: addi a0, sp, 16
|
||||
; LMULMAX2-NEXT: vle32.v v8, (a0)
|
||||
|
|
|
@ -413,52 +413,55 @@ declare <33 x double> @llvm.vp.load.v33f64.p0v33f64(<33 x double>*, <33 x i1>, i
|
|||
define <33 x double> @vpload_v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 zeroext %evl) {
|
||||
; CHECK-LABEL: vpload_v33f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a4, a2, -32
|
||||
; CHECK-NEXT: li a4, 32
|
||||
; CHECK-NEXT: vmv1r.v v8, v0
|
||||
; CHECK-NEXT: li a3, 0
|
||||
; CHECK-NEXT: li a5, 0
|
||||
; CHECK-NEXT: mv a3, a2
|
||||
; CHECK-NEXT: bltu a2, a4, .LBB32_2
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: mv a5, a4
|
||||
; CHECK-NEXT: li a3, 32
|
||||
; CHECK-NEXT: .LBB32_2:
|
||||
; CHECK-NEXT: li a4, 16
|
||||
; CHECK-NEXT: bltu a5, a4, .LBB32_4
|
||||
; CHECK-NEXT: addi a5, a3, -16
|
||||
; CHECK-NEXT: li a4, 0
|
||||
; CHECK-NEXT: bltu a3, a5, .LBB32_4
|
||||
; CHECK-NEXT: # %bb.3:
|
||||
; CHECK-NEXT: li a5, 16
|
||||
; CHECK-NEXT: mv a4, a5
|
||||
; CHECK-NEXT: .LBB32_4:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vslidedown.vi v0, v8, 4
|
||||
; CHECK-NEXT: addi a6, a1, 256
|
||||
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vle64.v v16, (a6), v0.t
|
||||
; CHECK-NEXT: li a5, 32
|
||||
; CHECK-NEXT: bltu a2, a5, .LBB32_6
|
||||
; CHECK-NEXT: # %bb.5:
|
||||
; CHECK-NEXT: li a2, 32
|
||||
; CHECK-NEXT: .LBB32_6:
|
||||
; CHECK-NEXT: addi a5, a2, -16
|
||||
; CHECK-NEXT: bltu a2, a5, .LBB32_8
|
||||
; CHECK-NEXT: # %bb.7:
|
||||
; CHECK-NEXT: mv a3, a5
|
||||
; CHECK-NEXT: .LBB32_8:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; CHECK-NEXT: vslidedown.vi v0, v8, 2
|
||||
; CHECK-NEXT: addi a5, a1, 128
|
||||
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vle64.v v24, (a5), v0.t
|
||||
; CHECK-NEXT: bltu a2, a4, .LBB32_10
|
||||
; CHECK-NEXT: # %bb.9:
|
||||
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vle64.v v16, (a5), v0.t
|
||||
; CHECK-NEXT: addi a5, a2, -32
|
||||
; CHECK-NEXT: li a4, 0
|
||||
; CHECK-NEXT: bltu a2, a5, .LBB32_6
|
||||
; CHECK-NEXT: # %bb.5:
|
||||
; CHECK-NEXT: mv a4, a5
|
||||
; CHECK-NEXT: .LBB32_6:
|
||||
; CHECK-NEXT: li a2, 16
|
||||
; CHECK-NEXT: bltu a4, a2, .LBB32_8
|
||||
; CHECK-NEXT: # %bb.7:
|
||||
; CHECK-NEXT: li a4, 16
|
||||
; CHECK-NEXT: .LBB32_8:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
|
||||
; CHECK-NEXT: vslidedown.vi v0, v8, 4
|
||||
; CHECK-NEXT: addi a5, a1, 256
|
||||
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vle64.v v24, (a5), v0.t
|
||||
; CHECK-NEXT: bltu a3, a2, .LBB32_10
|
||||
; CHECK-NEXT: # %bb.9:
|
||||
; CHECK-NEXT: li a3, 16
|
||||
; CHECK-NEXT: .LBB32_10:
|
||||
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v0, v8
|
||||
; CHECK-NEXT: vle64.v v8, (a1), v0.t
|
||||
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vse64.v v8, (a0)
|
||||
; CHECK-NEXT: addi a1, a0, 128
|
||||
; CHECK-NEXT: addi a1, a0, 256
|
||||
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vse64.v v24, (a1)
|
||||
; CHECK-NEXT: vfmv.f.s ft0, v16
|
||||
; CHECK-NEXT: fsd ft0, 256(a0)
|
||||
; CHECK-NEXT: addi a0, a0, 128
|
||||
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vse64.v v16, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <33 x double> @llvm.vp.load.v33f64.p0v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 %evl)
|
||||
ret <33 x double> %load
|
||||
|
|
Loading…
Reference in New Issue