[RISCV] Teach VSETVLInsert to eliminate redundant vsetvli for vmv.s.x and vfmv.s.f.

Differential Revision: https://reviews.llvm.org/D116307
This commit is contained in:
jacquesguan 2021-12-27 21:13:24 +08:00
parent 550d90e692
commit 128c6ed73b
7 changed files with 107 additions and 56 deletions

View File

@ -59,12 +59,13 @@ class VSETVLIInfo {
uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1;
uint8_t StoreOp : 1;
uint8_t ScalarMovOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
StoreOp(false), SEWLMULRatioOnly(false) {}
StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@ -96,6 +97,18 @@ public:
assert(hasAVLImm());
return AVLImm;
}
bool hasZeroAVL() const {
if (hasAVLImm())
return getAVLImm() == 0;
return false;
}
bool hasNonZeroAVL() const {
if (hasAVLImm())
return getAVLImm() > 0;
if (hasAVLReg())
return getAVLReg() == RISCV::X0;
return false;
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
@ -120,7 +133,7 @@ public:
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
bool IsStore) {
bool IsStore, bool IsScalarMovOp) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
@ -129,6 +142,7 @@ public:
MaskAgnostic = MA;
MaskRegOp = MRO;
StoreOp = IsStore;
ScalarMovOp = IsScalarMovOp;
}
unsigned encodeVTYPE() const {
@ -139,6 +153,16 @@ public:
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
bool hasSameSEW(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
return SEW == Other.SEW;
}
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
@ -178,6 +202,15 @@ public:
return getSEWLMULRatio() == Other.getSEWLMULRatio();
}
bool hasSamePolicy(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
return TailAgnostic == Other.TailAgnostic &&
MaskAgnostic == Other.MaskAgnostic;
}
bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
// Simple case, see if full VTYPE matches.
if (hasSameVTYPE(InstrInfo))
@ -222,6 +255,15 @@ public:
return true;
}
// For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
// So it's compatible when we could make sure that both VL be the same
// situation.
if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() &&
((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) ||
(hasZeroAVL() && InstrInfo.hasZeroAVL())) &&
hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo))
return true;
// The AVL must match.
if (!hasSameAVL(InstrInfo))
return false;
@ -414,6 +456,42 @@ static MachineInstr *elideCopies(MachineInstr *MI,
}
}
static bool isScalarMoveInstr(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case RISCV::PseudoVMV_S_X_M1:
case RISCV::PseudoVMV_S_X_M2:
case RISCV::PseudoVMV_S_X_M4:
case RISCV::PseudoVMV_S_X_M8:
case RISCV::PseudoVMV_S_X_MF2:
case RISCV::PseudoVMV_S_X_MF4:
case RISCV::PseudoVMV_S_X_MF8:
case RISCV::PseudoVFMV_F16_S_M1:
case RISCV::PseudoVFMV_F16_S_M2:
case RISCV::PseudoVFMV_F16_S_M4:
case RISCV::PseudoVFMV_F16_S_M8:
case RISCV::PseudoVFMV_F16_S_MF2:
case RISCV::PseudoVFMV_F16_S_MF4:
case RISCV::PseudoVFMV_F16_S_MF8:
case RISCV::PseudoVFMV_F32_S_M1:
case RISCV::PseudoVFMV_F32_S_M2:
case RISCV::PseudoVFMV_F32_S_M4:
case RISCV::PseudoVFMV_F32_S_M8:
case RISCV::PseudoVFMV_F32_S_MF2:
case RISCV::PseudoVFMV_F32_S_MF4:
case RISCV::PseudoVFMV_F32_S_MF8:
case RISCV::PseudoVFMV_F64_S_M1:
case RISCV::PseudoVFMV_F64_S_M2:
case RISCV::PseudoVFMV_F64_S_M4:
case RISCV::PseudoVFMV_F64_S_M8:
case RISCV::PseudoVFMV_F64_S_MF2:
case RISCV::PseudoVFMV_F64_S_MF4:
case RISCV::PseudoVFMV_F64_S_MF8:
return true;
}
}
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
@ -461,6 +539,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// If there are no explicit defs, this is a store instruction which can
// ignore the tail and mask policies.
bool StoreOp = MI.getNumExplicitDefs() == 0;
bool ScalarMovOp = isScalarMoveInstr(MI);
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
@ -477,7 +556,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
} else
InstrInfo.setAVLReg(RISCV::NoRegister);
InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
/*MaskAgnostic*/ false, MaskRegOp, StoreOp);
/*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp);
return InstrInfo;
}
@ -1000,6 +1079,13 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
NeedInsertVSETVLI = false;
}
if (isScalarMoveInstr(MI) &&
((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) ||
(CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) &&
NewInfo.hasSameVLMAX(CurInfo)) {
PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
NeedInsertVSETVLI = false;
}
}
if (NeedInsertVSETVLI)
insertVSETVLI(MBB, MI, NewInfo, CurInfo);

View File

@ -27,12 +27,10 @@ define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu
; CHECK-NEXT: vslideup.vi v12, v8, 8
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vrgather.vv v8, v20, v16
; CHECK-NEXT: lui a0, 11
; CHECK-NEXT: addiw a0, a0, -1366
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vrgather.vv v8, v20, v16
; CHECK-NEXT: vrgather.vv v8, v12, v18, v0.t
; CHECK-NEXT: ret
entry:

View File

@ -513,13 +513,12 @@ define void @buildvec_seq_v9i8(<9 x i8>* %x) {
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vmv.v.i v8, 2
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vmv.v.i v9, 2
; RV32-NEXT: li a1, 36
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vmerge.vim v8, v8, 3, v0
; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vmerge.vim v8, v9, 3, v0
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;

View File

@ -319,9 +319,7 @@ define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: li a1, 10
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsrl.vi v10, v8, 1
; CHECK-NEXT: vmv.v.x v8, a0
@ -401,11 +399,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, mu
; CHECK-NEXT: vmv.s.x v11, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@ -439,11 +435,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32-NEXT: vmv.v.x v11, a0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vrgather.vv v10, v8, v11
; RV32-NEXT: li a0, 66
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vrgather.vv v10, v8, v11
; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
@ -455,11 +449,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vmv.v.x v11, a0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vrgather.vv v10, v8, v11
; RV64-NEXT: li a0, 66
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vrgather.vv v10, v8, v11
; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret
@ -502,11 +494,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vrgather.vv v10, v8, v12
; RV32-NEXT: li a0, 98
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vrgather.vv v10, v8, v12
; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
@ -524,11 +514,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vrgather.vv v10, v8, v12
; RV64-NEXT: li a0, 98
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vrgather.vv v10, v8, v12
; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret

View File

@ -4114,22 +4114,16 @@ define void @mulhu_v16i16(<16 x i16>* %x) {
; LMULMAX2-RV32-NEXT: vle16.v v10, (a0)
; LMULMAX2-RV32-NEXT: lui a1, 2
; LMULMAX2-RV32-NEXT: addi a1, a1, 289
; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV32-NEXT: vmv.v.i v8, 3
; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 2, v0
; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3
; LMULMAX2-RV32-NEXT: lui a1, 4
; LMULMAX2-RV32-NEXT: addi a1, a1, 64
; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1
; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0
; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8
; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0
; LMULMAX2-RV32-NEXT: li a1, 257
; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0
; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI130_0)
; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI130_0)
@ -4153,22 +4147,16 @@ define void @mulhu_v16i16(<16 x i16>* %x) {
; LMULMAX2-RV64-NEXT: vle16.v v10, (a0)
; LMULMAX2-RV64-NEXT: lui a1, 2
; LMULMAX2-RV64-NEXT: addiw a1, a1, 289
; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV64-NEXT: vmv.v.i v8, 3
; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 2, v0
; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3
; LMULMAX2-RV64-NEXT: lui a1, 4
; LMULMAX2-RV64-NEXT: addiw a1, a1, 64
; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1
; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0
; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8
; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0
; LMULMAX2-RV64-NEXT: li a1, 257
; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0
; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI130_0)
; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI130_0)
@ -4531,11 +4519,9 @@ define void @mulhs_v16i16(<16 x i16>* %x) {
; LMULMAX2-RV32-NEXT: vle16.v v8, (a0)
; LMULMAX2-RV32-NEXT: lui a1, 7
; LMULMAX2-RV32-NEXT: addi a1, a1, -1687
; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT: lui a1, 5
; LMULMAX2-RV32-NEXT: addi a1, a1, -1755
; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
; LMULMAX2-RV32-NEXT: lui a1, 1048571
; LMULMAX2-RV32-NEXT: addi a1, a1, 1755
@ -4553,11 +4539,9 @@ define void @mulhs_v16i16(<16 x i16>* %x) {
; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
; LMULMAX2-RV64-NEXT: lui a1, 7
; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687
; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
; LMULMAX2-RV64-NEXT: lui a1, 5
; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755
; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1
; LMULMAX2-RV64-NEXT: lui a1, 1048571
; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755

View File

@ -256,9 +256,8 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64>
; RV64-NEXT: lwu a0, 0(a0)
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu
; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, mu
; RV64-NEXT: vslideup.vi v9, v8, 1
; RV64-NEXT: .LBB5_4: # %else2
; RV64-NEXT: vmv1r.v v8, v9

View File

@ -147,8 +147,7 @@ for.body: ; preds = %entry, %for.body
define <vscale x 1 x i64> @test7(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu
; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, mu
; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
entry:
@ -163,8 +162,7 @@ entry:
define <vscale x 1 x i64> @test8(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli a1, 6, e64, m1, ta, mu
; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu
; CHECK-NEXT: vsetivli a1, 6, e64, m1, tu, mu
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
entry:
@ -178,7 +176,6 @@ define <vscale x 1 x i64> @test9(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu
; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t
; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, mu
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
entry: