forked from OSchip/llvm-project
[RISCV] Use _COMMUTABLE fma pseudos for fixed vectors.
This matches what we do in the VLMAX SDNode patterns.
This commit is contained in:
parent
3d8823b8e4
commit
b49aaed8c7
|
@ -191,10 +191,11 @@ defm "" : VPatBinaryFPVL_VV_VF<riscv_fdiv_vl, "PseudoVFDIV">;
|
|||
foreach vti = AllFloatVectors in {
|
||||
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
|
||||
// works best with how TwoAddressInstructionPass tries commuting.
|
||||
defvar suffix = vti.LMul.MX # "_COMMUTABLE";
|
||||
def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rd, vti.RegClass:$rs1,
|
||||
vti.RegClass:$rs2, (vti.Mask true_mask),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>("PseudoVFMADD_VV_"# vti.LMul.MX)
|
||||
(!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
|
||||
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
|
||||
GPR:$vl, vti.SEW)>;
|
||||
}
|
||||
|
|
|
@ -257,14 +257,12 @@ define void @fma_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x half>* %z) {
|
|||
; CHECK-LABEL: fma_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a3, zero, 8
|
||||
; CHECK-NEXT: vsetvli a4, a3, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vsetvli a3, a3, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vle16.v v26, (a1)
|
||||
; CHECK-NEXT: vle16.v v27, (a2)
|
||||
; CHECK-NEXT: vsetvli a1, a3, e16,m1,tu,mu
|
||||
; CHECK-NEXT: vfmadd.vv v25, v26, v27
|
||||
; CHECK-NEXT: vsetvli a1, a3, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: vfmacc.vv v27, v25, v26
|
||||
; CHECK-NEXT: vse16.v v27, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = load <8 x half>, <8 x half>* %y
|
||||
|
@ -279,14 +277,12 @@ define void @fma_v4f32(<4 x float>* %x, <4 x float>* %y, <4 x float>* %z) {
|
|||
; CHECK-LABEL: fma_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a3, zero, 4
|
||||
; CHECK-NEXT: vsetvli a4, a3, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vsetvli a3, a3, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vle32.v v26, (a1)
|
||||
; CHECK-NEXT: vle32.v v27, (a2)
|
||||
; CHECK-NEXT: vsetvli a1, a3, e32,m1,tu,mu
|
||||
; CHECK-NEXT: vfmadd.vv v25, v26, v27
|
||||
; CHECK-NEXT: vsetvli a1, a3, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: vfmacc.vv v27, v25, v26
|
||||
; CHECK-NEXT: vse32.v v27, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = load <4 x float>, <4 x float>* %y
|
||||
|
@ -301,14 +297,12 @@ define void @fma_v2f64(<2 x double>* %x, <2 x double>* %y, <2 x double>* %z) {
|
|||
; CHECK-LABEL: fma_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a3, zero, 2
|
||||
; CHECK-NEXT: vsetvli a4, a3, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vsetvli a3, a3, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vle64.v v26, (a1)
|
||||
; CHECK-NEXT: vle64.v v27, (a2)
|
||||
; CHECK-NEXT: vsetvli a1, a3, e64,m1,tu,mu
|
||||
; CHECK-NEXT: vfmadd.vv v25, v26, v27
|
||||
; CHECK-NEXT: vsetvli a1, a3, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: vfmacc.vv v27, v25, v26
|
||||
; CHECK-NEXT: vse64.v v27, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = load <2 x double>, <2 x double>* %y
|
||||
|
@ -995,35 +989,31 @@ define void @fma_v16f16(<16 x half>* %x, <16 x half>* %y, <16 x half>* %z) {
|
|||
; LMULMAX2-LABEL: fma_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a3, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a4, a3, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vsetvli a3, a3, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vle16.v v30, (a2)
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e16,m2,tu,mu
|
||||
; LMULMAX2-NEXT: vfmadd.vv v26, v28, v30
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfmacc.vv v30, v26, v28
|
||||
; LMULMAX2-NEXT: vse16.v v30, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fma_v16f16:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a3, zero, 8
|
||||
; LMULMAX1-NEXT: vsetvli a4, a3, e16,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vsetvli a3, a3, e16,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-NEXT: addi a4, a0, 16
|
||||
; LMULMAX1-NEXT: vle16.v v26, (a4)
|
||||
; LMULMAX1-NEXT: addi a3, a0, 16
|
||||
; LMULMAX1-NEXT: vle16.v v26, (a3)
|
||||
; LMULMAX1-NEXT: vle16.v v27, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a1, 16
|
||||
; LMULMAX1-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a2, 16
|
||||
; LMULMAX1-NEXT: vle16.v v29, (a1)
|
||||
; LMULMAX1-NEXT: vle16.v v30, (a2)
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e16,m1,tu,mu
|
||||
; LMULMAX1-NEXT: vfmadd.vv v26, v28, v29
|
||||
; LMULMAX1-NEXT: vfmadd.vv v25, v27, v30
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e16,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-NEXT: vse16.v v26, (a4)
|
||||
; LMULMAX1-NEXT: vfmacc.vv v29, v26, v28
|
||||
; LMULMAX1-NEXT: vfmacc.vv v30, v25, v27
|
||||
; LMULMAX1-NEXT: vse16.v v30, (a0)
|
||||
; LMULMAX1-NEXT: vse16.v v29, (a3)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = load <16 x half>, <16 x half>* %y
|
||||
|
@ -1038,35 +1028,31 @@ define void @fma_v8f32(<8 x float>* %x, <8 x float>* %y, <8 x float>* %z) {
|
|||
; LMULMAX2-LABEL: fma_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a3, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a4, a3, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vsetvli a3, a3, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vle32.v v30, (a2)
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e32,m2,tu,mu
|
||||
; LMULMAX2-NEXT: vfmadd.vv v26, v28, v30
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfmacc.vv v30, v26, v28
|
||||
; LMULMAX2-NEXT: vse32.v v30, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fma_v8f32:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a3, zero, 4
|
||||
; LMULMAX1-NEXT: vsetvli a4, a3, e32,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vsetvli a3, a3, e32,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-NEXT: addi a4, a0, 16
|
||||
; LMULMAX1-NEXT: vle32.v v26, (a4)
|
||||
; LMULMAX1-NEXT: addi a3, a0, 16
|
||||
; LMULMAX1-NEXT: vle32.v v26, (a3)
|
||||
; LMULMAX1-NEXT: vle32.v v27, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a1, 16
|
||||
; LMULMAX1-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a2, 16
|
||||
; LMULMAX1-NEXT: vle32.v v29, (a1)
|
||||
; LMULMAX1-NEXT: vle32.v v30, (a2)
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e32,m1,tu,mu
|
||||
; LMULMAX1-NEXT: vfmadd.vv v26, v28, v29
|
||||
; LMULMAX1-NEXT: vfmadd.vv v25, v27, v30
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e32,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-NEXT: vse32.v v26, (a4)
|
||||
; LMULMAX1-NEXT: vfmacc.vv v29, v26, v28
|
||||
; LMULMAX1-NEXT: vfmacc.vv v30, v25, v27
|
||||
; LMULMAX1-NEXT: vse32.v v30, (a0)
|
||||
; LMULMAX1-NEXT: vse32.v v29, (a3)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = load <8 x float>, <8 x float>* %y
|
||||
|
@ -1081,35 +1067,31 @@ define void @fma_v4f64(<4 x double>* %x, <4 x double>* %y, <4 x double>* %z) {
|
|||
; LMULMAX2-LABEL: fma_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a3, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a4, a3, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vsetvli a3, a3, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vle64.v v30, (a2)
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e64,m2,tu,mu
|
||||
; LMULMAX2-NEXT: vfmadd.vv v26, v28, v30
|
||||
; LMULMAX2-NEXT: vsetvli a1, a3, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfmacc.vv v30, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v30, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fma_v4f64:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a3, zero, 2
|
||||
; LMULMAX1-NEXT: vsetvli a4, a3, e64,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vsetvli a3, a3, e64,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-NEXT: addi a4, a0, 16
|
||||
; LMULMAX1-NEXT: vle64.v v26, (a4)
|
||||
; LMULMAX1-NEXT: addi a3, a0, 16
|
||||
; LMULMAX1-NEXT: vle64.v v26, (a3)
|
||||
; LMULMAX1-NEXT: vle64.v v27, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a1, 16
|
||||
; LMULMAX1-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a2, 16
|
||||
; LMULMAX1-NEXT: vle64.v v29, (a1)
|
||||
; LMULMAX1-NEXT: vle64.v v30, (a2)
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e64,m1,tu,mu
|
||||
; LMULMAX1-NEXT: vfmadd.vv v26, v28, v29
|
||||
; LMULMAX1-NEXT: vfmadd.vv v25, v27, v30
|
||||
; LMULMAX1-NEXT: vsetvli a1, a3, e64,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-NEXT: vse64.v v26, (a4)
|
||||
; LMULMAX1-NEXT: vfmacc.vv v29, v26, v28
|
||||
; LMULMAX1-NEXT: vfmacc.vv v30, v25, v27
|
||||
; LMULMAX1-NEXT: vse64.v v30, (a0)
|
||||
; LMULMAX1-NEXT: vse64.v v29, (a3)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = load <4 x double>, <4 x double>* %y
|
||||
|
|
Loading…
Reference in New Issue