diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index bed4a705b0c6..88942d42c65c 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -398,9 +398,9 @@ public: } bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; - void generateSWaitCntInstBefore(MachineInstr &MI, + void generateWaitcntInstBefore(MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets); - void updateEventWaitCntAfter(MachineInstr &Inst, + void updateEventWaitcntAfter(MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets); void mergeInputScoreBrackets(MachineBasicBlock &Block); bool isLoopBottom(const MachineLoop *Loop, const MachineBasicBlock *Block); @@ -825,11 +825,11 @@ unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) { /// and if so what the value of each counter is. /// The "score bracket" is bound by the lower bound and upper bound /// scores (*_score_LB and *_score_ub respectively). -void SIInsertWaitcnts::generateSWaitCntInstBefore( +void SIInsertWaitcnts::generateWaitcntInstBefore( MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) { // To emit, or not to emit - that's the question! // Start with an assumption that there is no need to emit. - unsigned int EmitSwaitcnt = 0; + unsigned int EmitWaitcnt = 0; // No need to wait before phi. If a phi-move exists, then the wait should // has been inserted before the move. If a phi-move does not exist, then // wait should be inserted before the real use. The same is true for @@ -850,7 +850,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( ScoreBrackets->clearWaitAtBeginning(); for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { - EmitSwaitcnt |= CNT_MASK(T); + EmitWaitcnt |= CNT_MASK(T); ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T)); } } @@ -860,7 +860,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 || MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC || MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) { - EmitSwaitcnt |= + EmitWaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); } @@ -874,7 +874,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( T = (enum InstCounterType)(T + 1)) { if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) { ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T)); - EmitSwaitcnt |= CNT_MASK(T); + EmitWaitcnt |= CNT_MASK(T); } } } @@ -885,7 +885,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( AMDGPU::SendMsg::ID_GS_DONE)) { if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) { ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); - EmitSwaitcnt |= CNT_MASK(VM_CNT); + EmitWaitcnt |= CNT_MASK(VM_CNT); } } #if 0 // TODO: the following blocks of logic when we have fence. @@ -903,11 +903,11 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( case SCMEM_LDS: if (group_is_multi_wave || context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) { - EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT)); // LDS may have to wait for VM_CNT after buffer load to LDS if (target_info->HasBufferLoadToLDS()) { - EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); } } @@ -915,9 +915,9 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( case SCMEM_GDS: if (group_is_multi_wave || fence_is_global) { - EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT)); } break; @@ -927,9 +927,9 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( case SCMEM_RING: case SCMEM_SCATTER: if (group_is_multi_wave || fence_is_global) { - EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, + EmitWaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); } break; @@ -950,13 +950,13 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) { // Export and GDS are tracked individually, either may trigger a waitcnt // for EXEC. - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK)); } @@ -971,7 +971,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( if (ScoreBrackets->getScoreUB(EXP_CNT) > ScoreBrackets->getScoreLB(EXP_CNT)) { ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT)); - EmitSwaitcnt |= CNT_MASK(EXP_CNT); + EmitWaitcnt |= CNT_MASK(EXP_CNT); } } #endif @@ -989,7 +989,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; // VM_CNT is only relevant to vgpr or LDS. - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT)); } @@ -1001,10 +1001,10 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { if (TRI->isVGPR(MRIA, Op.getReg())) { // VM_CNT is only relevant to vgpr or LDS. - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT)); } - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT)); } } @@ -1023,9 +1023,9 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( if (AS != AMDGPUASI.LOCAL_ADDRESS) continue; unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT)); } } @@ -1036,12 +1036,12 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true); for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { if (TRI->isVGPR(MRIA, Def.getReg())) { - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT)); } - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT)); } } // End of for loop that looks at all dest operands. @@ -1056,11 +1056,11 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( // requiring a WAITCNT beforehand. if (MI.getOpcode() == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier()) { - EmitSwaitcnt |= + EmitWaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT)); - EmitSwaitcnt |= ScoreBrackets->updateByWait( + EmitWaitcnt |= ScoreBrackets->updateByWait( LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT)); } @@ -1077,12 +1077,12 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( // another s_waitcnt inserted right after this if there are non-LGKM // instructions still outstanding. ForceZero = true; - EmitSwaitcnt = true; + EmitWaitcnt = true; } } // Does this operand processing indicate s_wait counter update? - if (EmitSwaitcnt) { + if (EmitWaitcnt) { int CntVal[NUM_INST_CNTS]; bool UseDefaultWaitcntStrategy = true; @@ -1101,7 +1101,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( if (UseDefaultWaitcntStrategy) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { - if (EmitSwaitcnt & CNT_MASK(T)) { + if (EmitWaitcnt & CNT_MASK(T)) { int Delta = ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T); int MaxDelta = ScoreBrackets->getWaitCountMax(T); @@ -1111,7 +1111,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( ScoreBrackets->setScoreLB( T, ScoreBrackets->getScoreUB(T) - MaxDelta); } - EmitSwaitcnt &= ~CNT_MASK(T); + EmitWaitcnt &= ~CNT_MASK(T); } CntVal[T] = Delta; } else { @@ -1123,7 +1123,7 @@ void SIInsertWaitcnts::generateSWaitCntInstBefore( } // If we are not waiting on any counter we can skip the wait altogether. - if (EmitSwaitcnt != 0) { + if (EmitWaitcnt != 0) { MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt(); int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm(); if (!OldWaitcnt || @@ -1235,7 +1235,7 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const { return false; } -void SIInsertWaitcnts::updateEventWaitCntAfter( +void SIInsertWaitcnts::updateEventWaitcntAfter( MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) { // Now look at the instruction opcode. If it is a memory access // instruction, update the upper-bound of the appropriate counter's @@ -1646,9 +1646,9 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, // Generate an s_waitcnt instruction to be placed before // cur_Inst, if needed. - generateSWaitCntInstBefore(Inst, ScoreBrackets); + generateWaitcntInstBefore(Inst, ScoreBrackets); - updateEventWaitCntAfter(Inst, ScoreBrackets); + updateEventWaitcntAfter(Inst, ScoreBrackets); #if 0 // TODO: implement resource type check controlled by options with ub = LB. // If this instruction generates a S_SETVSKIP because it is an