diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6f1dfdac2c6f..8fbfab9f2d77 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1456,9 +1456,10 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, // ST->partialVCCWritesUpdateVCCZ(). bool RestoreVCCZ = false; if (readsVCCZ(Inst)) { - if (!VCCZCorrect) + if (!VCCZCorrect) { + // Restore vccz if it's not known to be correct already. RestoreVCCZ = true; - else if (ST->hasReadVCCZBug()) { + } else if (ST->hasReadVCCZBug()) { // There is a hardware bug on CI/SI where SMRD instruction may corrupt // vccz bit, so when we detect that an instruction may read from a // corrupt vccz bit, we need to: @@ -1469,6 +1470,8 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, if (ScoreBrackets.getScoreLB(LGKM_CNT) < ScoreBrackets.getScoreUB(LGKM_CNT) && ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) { + // Restore vccz if there's an outstanding smem read, which could + // complete and clobber vccz at any time. RestoreVCCZ = true; } } @@ -1482,13 +1485,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } if (!ST->partialVCCWritesUpdateVCCZ()) { - // Up to gfx9, writes to vcc_lo and vcc_hi don't update vccz. - // Writes to vcc will fix it. if (Inst.definesRegister(AMDGPU::VCC_LO) || - Inst.definesRegister(AMDGPU::VCC_HI)) + Inst.definesRegister(AMDGPU::VCC_HI)) { + // Up to gfx9, writes to vcc_lo and vcc_hi don't update vccz. VCCZCorrect = false; - else if (Inst.definesRegister(AMDGPU::VCC)) + } else if (Inst.definesRegister(AMDGPU::VCC)) { + // Writes to vcc will fix any incorrect value in vccz. VCCZCorrect = true; + } } // Generate an s_waitcnt instruction to be placed before