[RISCV] Teach RISCVInsertVSETVLI::needVSETVLI to handle mask register instructions better.

If the VL operand of a mask register instruction comes from an
explicit vsetvli with a different VTYPE, we can still avoid needing
a vsetvli as long as the SEW/LMUL ratio is the same and policy bits
match.

Differential Revision: https://reviews.llvm.org/D112762
This commit is contained in:
Craig Topper 2021-10-28 15:09:09 -07:00
parent 1deccd05ba
commit aefcd59895
2 changed files with 24 additions and 13 deletions

View File

@ -178,6 +178,26 @@ public:
return getSEWLMULRatio() == Other.getSEWLMULRatio(); return getSEWLMULRatio() == Other.getSEWLMULRatio();
} }
bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
// Simple case, see if full VTYPE matches.
if (hasSameVTYPE(InstrInfo))
return true;
if (Strict)
return false;
// If this is a mask reg operation, it only cares about VLMAX.
// FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
// than "InstrInfo".
// FIXME: The policy bits can probably be ignored for mask reg operations.
if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
TailAgnostic == InstrInfo.TailAgnostic &&
MaskAgnostic == InstrInfo.MaskAgnostic)
return true;
return false;
}
// Determine whether the vector instructions requirements represented by // Determine whether the vector instructions requirements represented by
// InstrInfo are compatible with the previous vsetvli instruction represented // InstrInfo are compatible with the previous vsetvli instruction represented
// by this. // by this.
@ -206,23 +226,15 @@ public:
if (!hasSameAVL(InstrInfo)) if (!hasSameAVL(InstrInfo))
return false; return false;
// Simple case, see if full VTYPE matches. if (hasCompatibleVTYPE(InstrInfo, Strict))
if (hasSameVTYPE(InstrInfo))
return true; return true;
// Strict matches must ensure a full VTYPE match. // Strict matches must ensure a full VTYPE match.
if (Strict) if (Strict)
return false; return false;
// If this is a mask reg operation, it only cares about VLMAX.
// FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
// than "InstrInfo".
if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
TailAgnostic == InstrInfo.TailAgnostic &&
MaskAgnostic == InstrInfo.MaskAgnostic)
return true;
// Store instructions don't use the policy fields. // Store instructions don't use the policy fields.
// TODO: Move into hasCompatibleVTYPE?
if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW) if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW)
return true; return true;
@ -564,7 +576,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
// VSETVLI here. // VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() && if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() && Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
Require.hasSameVTYPE(CurInfo)) { CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 || DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 ||

View File

@ -77,13 +77,12 @@ entry:
ret <vscale x 1 x i64> %1 ret <vscale x 1 x i64> %1
} }
; FIXME the second vsetvli is unnecessary. ; Make sure we don't insert a vsetvli for the vmand instruction.
define <vscale x 1 x i1> @test5(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %avl) nounwind { define <vscale x 1 x i1> @test5(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %avl) nounwind {
; CHECK-LABEL: test5: ; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: vmand.mm v0, v8, v0
; CHECK-NEXT: ret ; CHECK-NEXT: ret
entry: entry: