forked from OSchip/llvm-project
[RISCV] Fix a vsetvli insertion bug involving loads/stores.
The first phase of the analysis can avoid a vsetvli if an earlier instruction in the block used an SEW and LMUL that when combined with the EEW of the load/store would produce the desired EMUL. If we avoided a vsetvli this will affect the global analysis we do in the second phase. The third phase where we really insert the vsetvlis needs to agree with the first phase. If it doesn't we can insert vsetvlis that invalidate the global analysis. In the test case there is a VSETVLI in the preheader that sets SEW=64 and LMUL=1. Inside the loop there is a VADD with SEW=64 and LMUL=1. This VADD is followed by a store that wants wants SEW=32 LMUL=1/2. Because it has EEW=32 as part of the opcode the SEW=64 LMUL=1 from the VADD can be become EMUL=1 for the store. So the first phase determines no vsetvli is needed. The third phase manages CurInfo differently than BBInfo.Change from the first phase. CurInfo is only updated when we see a vsetvli or insert a vsetvli. This was done to allow predecessor block information from the global analysis to be applied to multiple instructions. Since the loop body has no vsetvli we won't update CurInfo for either the VADD or the VSE. This prevented us from checking the store vsetvli elision for the VSE resulting in a vsetvli SEW=32 LMUL=1/2 being emitted which invalidated the global analysis. To mitigate this, I've added a BBLocalInfo variable that more closely matches the first phase propagation. This gets updated based on the VADD and prevents emitting a vsetvli for the store like we did in the first phase. I wonder if we should do an earlier phase to handle the load/store case by adding more pseudo opcodes and changing the SEW/LMUL for those instructions before the insertion analysis. That might be more robust than trying to guarantee two phases make the same decision. Fixes the test from D118629. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D118667
This commit is contained in:
parent
6b8800dfb5
commit
7eb7810727
|
@ -999,6 +999,11 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
|
|||
|
||||
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
||||
VSETVLIInfo CurInfo;
|
||||
// BBLocalInfo tracks the VL/VTYPE state way BBInfo.Change was calculated in
|
||||
// computeIncomingVLVTYPE. We need this to apply canSkipVSETVLIForLoadStore
|
||||
// the same way computeIncomingVLVTYPE did. We can't include predecessor
|
||||
// information in that decision to avoid disagreeing with the global analysis.
|
||||
VSETVLIInfo BBLocalInfo;
|
||||
// Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
|
||||
MachineInstr *PrevVSETVLIMI = nullptr;
|
||||
|
||||
|
@ -1014,6 +1019,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
MI.getOperand(3).setIsDead(false);
|
||||
MI.getOperand(4).setIsDead(false);
|
||||
CurInfo = getInfoForVSETVLI(MI);
|
||||
BBLocalInfo = getInfoForVSETVLI(MI);
|
||||
PrevVSETVLIMI = &MI;
|
||||
continue;
|
||||
}
|
||||
|
@ -1043,12 +1049,22 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
// use the predecessor information.
|
||||
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
|
||||
"Expected a valid predecessor state.");
|
||||
if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
|
||||
// Don't use predecessor information if there was an earlier instruction
|
||||
// in this block that allowed a vsetvli to be skipped for load/store.
|
||||
if (!(BBLocalInfo.isValid() &&
|
||||
canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) &&
|
||||
needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
|
||||
needVSETVLIPHI(NewInfo, MBB)) {
|
||||
insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
|
||||
CurInfo = NewInfo;
|
||||
BBLocalInfo = NewInfo;
|
||||
}
|
||||
|
||||
// We must update BBLocalInfo for every vector instruction.
|
||||
if (!BBLocalInfo.isValid())
|
||||
BBLocalInfo = NewInfo;
|
||||
} else {
|
||||
assert(BBLocalInfo.isValid());
|
||||
// If this instruction isn't compatible with the previous VL/VTYPE
|
||||
// we need to insert a VSETVLI.
|
||||
// If this is a unit-stride or strided load/store, we may be able to use
|
||||
|
@ -1084,6 +1100,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
if (NeedInsertVSETVLI)
|
||||
insertVSETVLI(MBB, MI, NewInfo, CurInfo);
|
||||
CurInfo = NewInfo;
|
||||
BBLocalInfo = NewInfo;
|
||||
}
|
||||
}
|
||||
PrevVSETVLIMI = nullptr;
|
||||
|
@ -1094,6 +1111,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
|
||||
MI.modifiesRegister(RISCV::VTYPE)) {
|
||||
CurInfo = VSETVLIInfo::getUnknown();
|
||||
BBLocalInfo = VSETVLIInfo::getUnknown();
|
||||
PrevVSETVLIMI = nullptr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -567,10 +567,6 @@ body: |
|
|||
; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6, implicit $vl, implicit $vtype
|
||||
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]]
|
||||
; FIXME: We insert a SEW=32,LMUL=1/2 VSETVLI here but no SEW=64,LMUL=1
|
||||
; VSETVLI before the VADD above. This misconfigures the VADD in the case that
|
||||
; the loop takes its backedge.
|
||||
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87, implicit-def $vl, implicit-def $vtype, implicit $vl
|
||||
; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5, implicit $vl, implicit $vtype
|
||||
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
|
||||
; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1
|
||||
|
|
Loading…
Reference in New Issue