[RISCV] Teach RISCVTargetLowering::shouldSinkOperands to sink splats for FAdd/FSub/FMul/FDiv.

This commit is contained in:
Craig Topper 2021-09-20 10:21:42 -07:00
parent 890027b314
commit 04ab6c85ef
2 changed files with 70 additions and 78 deletions

View File

@ -1065,6 +1065,10 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Instruction::Add: case Instruction::Add:
case Instruction::Sub: case Instruction::Sub:
case Instruction::Mul: case Instruction::Mul:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
return true; return true;
case Instruction::Shl: case Instruction::Shl:
case Instruction::LShr: case Instruction::LShr:

View File

@ -909,14 +909,13 @@ define void @sink_splat_fmul(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul: ; CHECK-LABEL: sink_splat_fmul:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB14_1: # %vector.body ; CHECK-NEXT: .LBB14_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmul.vv v26, v26, v25 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfmul.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB14_1 ; CHECK-NEXT: bnez a1, .LBB14_1
@ -947,14 +946,13 @@ define void @sink_splat_fdiv(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv: ; CHECK-LABEL: sink_splat_fdiv:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB15_1: # %vector.body ; CHECK-NEXT: .LBB15_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfdiv.vv v26, v26, v25 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfdiv.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB15_1 ; CHECK-NEXT: bnez a1, .LBB15_1
@ -985,14 +983,13 @@ define void @sink_splat_frdiv(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv: ; CHECK-LABEL: sink_splat_frdiv:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB16_1: # %vector.body ; CHECK-NEXT: .LBB16_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfdiv.vv v26, v25, v26 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfrdiv.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB16_1 ; CHECK-NEXT: bnez a1, .LBB16_1
@ -1023,14 +1020,13 @@ define void @sink_splat_fadd(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd: ; CHECK-LABEL: sink_splat_fadd:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB17_1: # %vector.body ; CHECK-NEXT: .LBB17_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfadd.vv v26, v26, v25 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfadd.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB17_1 ; CHECK-NEXT: bnez a1, .LBB17_1
@ -1061,14 +1057,13 @@ define void @sink_splat_fsub(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub: ; CHECK-LABEL: sink_splat_fsub:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB18_1: # %vector.body ; CHECK-NEXT: .LBB18_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfsub.vv v26, v26, v25 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfsub.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB18_1 ; CHECK-NEXT: bnez a1, .LBB18_1
@ -1099,14 +1094,13 @@ define void @sink_splat_frsub(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub: ; CHECK-LABEL: sink_splat_frsub:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: addi a1, zero, 1024 ; CHECK-NEXT: addi a1, zero, 1024
; CHECK-NEXT: .LBB19_1: # %vector.body ; CHECK-NEXT: .LBB19_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle32.v v26, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfsub.vv v26, v25, v26 ; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vse32.v v26, (a0) ; CHECK-NEXT: vfrsub.vf v25, v25, ft0
; CHECK-NEXT: vse32.v v25, (a0)
; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a1, a1, -4
; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bnez a1, .LBB19_1 ; CHECK-NEXT: bnez a1, .LBB19_1
@ -1136,8 +1130,8 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) { define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK-LABEL: sink_splat_fmul_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB20_2 ; CHECK-NEXT: bgeu a4, a3, .LBB20_2
@ -1148,16 +1142,15 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB20_3: # %vector.body ; CHECK-NEXT: .LBB20_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfmul.vv v26, v26, v25 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfmul.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB20_3 ; CHECK-NEXT: bne a5, a1, .LBB20_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB20_7 ; CHECK-NEXT: beqz a6, .LBB20_7
@ -1230,8 +1223,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) { define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK-LABEL: sink_splat_fdiv_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB21_2 ; CHECK-NEXT: bgeu a4, a3, .LBB21_2
@ -1242,16 +1235,15 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB21_3: # %vector.body ; CHECK-NEXT: .LBB21_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfdiv.vv v26, v26, v25 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfdiv.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB21_3 ; CHECK-NEXT: bne a5, a1, .LBB21_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB21_7 ; CHECK-NEXT: beqz a6, .LBB21_7
@ -1324,8 +1316,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) { define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK-LABEL: sink_splat_frdiv_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB22_2 ; CHECK-NEXT: bgeu a4, a3, .LBB22_2
@ -1336,16 +1328,15 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB22_3: # %vector.body ; CHECK-NEXT: .LBB22_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfdiv.vv v26, v25, v26 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfrdiv.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB22_3 ; CHECK-NEXT: bne a5, a1, .LBB22_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB22_7 ; CHECK-NEXT: beqz a6, .LBB22_7
@ -1418,8 +1409,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) { define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK-LABEL: sink_splat_fadd_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB23_2 ; CHECK-NEXT: bgeu a4, a3, .LBB23_2
@ -1430,16 +1421,15 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB23_3: # %vector.body ; CHECK-NEXT: .LBB23_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfadd.vv v26, v26, v25 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfadd.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB23_3 ; CHECK-NEXT: bne a5, a1, .LBB23_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB23_7 ; CHECK-NEXT: beqz a6, .LBB23_7
@ -1512,8 +1502,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) { define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK-LABEL: sink_splat_fsub_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB24_2 ; CHECK-NEXT: bgeu a4, a3, .LBB24_2
@ -1524,16 +1514,15 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB24_3: # %vector.body ; CHECK-NEXT: .LBB24_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfsub.vv v26, v26, v25 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfsub.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB24_3 ; CHECK-NEXT: bne a5, a1, .LBB24_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB24_7 ; CHECK-NEXT: beqz a6, .LBB24_7
@ -1606,8 +1595,8 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) { define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK-LABEL: sink_splat_frsub_scalable:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a7, vlenb
; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: srli a3, a7, 2
; CHECK-NEXT: addi a4, zero, 1024 ; CHECK-NEXT: addi a4, zero, 1024
; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: fmv.w.x ft0, a1
; CHECK-NEXT: bgeu a4, a3, .LBB25_2 ; CHECK-NEXT: bgeu a4, a3, .LBB25_2
@ -1618,16 +1607,15 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
; CHECK-NEXT: mv a5, zero ; CHECK-NEXT: mv a5, zero
; CHECK-NEXT: remu a6, a4, a3 ; CHECK-NEXT: remu a6, a4, a3
; CHECK-NEXT: sub a1, a4, a6 ; CHECK-NEXT: sub a1, a4, a6
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB25_3: # %vector.body ; CHECK-NEXT: .LBB25_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v26, (a4) ; CHECK-NEXT: vl1re32.v v25, (a4)
; CHECK-NEXT: vfsub.vv v26, v25, v26 ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
; CHECK-NEXT: vs1r.v v26, (a4) ; CHECK-NEXT: vfrsub.vf v25, v25, ft0
; CHECK-NEXT: vs1r.v v25, (a4)
; CHECK-NEXT: add a5, a5, a3 ; CHECK-NEXT: add a5, a5, a3
; CHECK-NEXT: add a4, a4, a2 ; CHECK-NEXT: add a4, a4, a7
; CHECK-NEXT: bne a5, a1, .LBB25_3 ; CHECK-NEXT: bne a5, a1, .LBB25_3
; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a6, .LBB25_7 ; CHECK-NEXT: beqz a6, .LBB25_7