2016-04-30 08:23:06 +08:00
|
|
|
|
//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
|
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-04-30 08:23:06 +08:00
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// This file implements hazard recognizers for scheduling on GCN processors.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "GCNHazardRecognizer.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "SIDefines.h"
|
2016-04-30 08:23:06 +08:00
|
|
|
|
#include "SIInstrInfo.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
|
|
|
|
#include "llvm/ADT/iterator_range.h"
|
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2019-05-04 12:30:57 +08:00
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2016-04-30 08:23:06 +08:00
|
|
|
|
#include "llvm/CodeGen/ScheduleDAG.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <cassert>
|
|
|
|
|
#include <limits>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <vector>
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// Hazard Recoginizer Implementation
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
|
2019-01-22 03:11:26 +08:00
|
|
|
|
IsHazardRecognizerMode(false),
|
2016-04-30 08:23:06 +08:00
|
|
|
|
CurrCycleInstr(nullptr),
|
2016-06-24 14:30:11 +08:00
|
|
|
|
MF(MF),
|
2018-07-12 04:59:01 +08:00
|
|
|
|
ST(MF.getSubtarget<GCNSubtarget>()),
|
2017-11-17 12:18:24 +08:00
|
|
|
|
TII(*ST.getInstrInfo()),
|
|
|
|
|
TRI(TII.getRegisterInfo()),
|
|
|
|
|
ClauseUses(TRI.getNumRegUnits()),
|
|
|
|
|
ClauseDefs(TRI.getNumRegUnits()) {
|
2019-07-12 05:30:34 +08:00
|
|
|
|
MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
|
|
|
|
|
TSchedModel.init(&ST);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
|
|
|
|
|
EmitInstruction(SU->getInstr());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
|
|
|
|
|
CurrCycleInstr = MI;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-08 07:42:48 +08:00
|
|
|
|
static bool isDivFMas(unsigned Opcode) {
|
|
|
|
|
return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-15 08:58:14 +08:00
|
|
|
|
static bool isSGetReg(unsigned Opcode) {
|
|
|
|
|
return Opcode == AMDGPU::S_GETREG_B32;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isSSetReg(unsigned Opcode) {
|
2020-09-10 00:21:36 +08:00
|
|
|
|
switch (Opcode) {
|
|
|
|
|
case AMDGPU::S_SETREG_B32:
|
|
|
|
|
case AMDGPU::S_SETREG_B32_mode:
|
|
|
|
|
case AMDGPU::S_SETREG_IMM32_B32:
|
|
|
|
|
case AMDGPU::S_SETREG_IMM32_B32_mode:
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2016-10-15 08:58:14 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-10-28 07:42:29 +08:00
|
|
|
|
static bool isRWLane(unsigned Opcode) {
|
|
|
|
|
return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-28 07:50:21 +08:00
|
|
|
|
static bool isRFE(unsigned Opcode) {
|
|
|
|
|
return Opcode == AMDGPU::S_RFE_B64;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
|
static bool isSMovRel(unsigned Opcode) {
|
2017-03-18 05:36:28 +08:00
|
|
|
|
switch (Opcode) {
|
|
|
|
|
case AMDGPU::S_MOVRELS_B32:
|
|
|
|
|
case AMDGPU::S_MOVRELS_B64:
|
|
|
|
|
case AMDGPU::S_MOVRELD_B32:
|
|
|
|
|
case AMDGPU::S_MOVRELD_B64:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2017-02-19 02:29:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-01-16 23:43:53 +08:00
|
|
|
|
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
|
|
|
|
|
const MachineInstr &MI) {
|
|
|
|
|
if (TII.isAlwaysGDS(MI.getOpcode()))
|
|
|
|
|
return true;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
|
case AMDGPU::S_SENDMSG:
|
|
|
|
|
case AMDGPU::S_SENDMSGHALT:
|
|
|
|
|
case AMDGPU::S_TTRACEDATA:
|
|
|
|
|
return true;
|
2019-01-16 23:43:53 +08:00
|
|
|
|
// These DS opcodes don't support GDS.
|
|
|
|
|
case AMDGPU::DS_NOP:
|
|
|
|
|
case AMDGPU::DS_PERMUTE_B32:
|
|
|
|
|
case AMDGPU::DS_BPERMUTE_B32:
|
|
|
|
|
return false;
|
2017-11-18 05:35:32 +08:00
|
|
|
|
default:
|
2019-01-16 23:43:53 +08:00
|
|
|
|
if (TII.isDS(MI.getOpcode())) {
|
|
|
|
|
int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
|
|
|
|
AMDGPU::OpName::gds);
|
|
|
|
|
if (MI.getOperand(GDS).getImm())
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2017-11-18 05:35:32 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-13 01:52:51 +08:00
|
|
|
|
static bool isPermlane(const MachineInstr &MI) {
|
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
|
return Opcode == AMDGPU::V_PERMLANE16_B32 ||
|
|
|
|
|
Opcode == AMDGPU::V_PERMLANEX16_B32;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-28 07:50:21 +08:00
|
|
|
|
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
|
2016-10-15 08:58:14 +08:00
|
|
|
|
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
|
|
|
|
|
AMDGPU::OpName::simm16);
|
|
|
|
|
return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
ScheduleHazardRecognizer::HazardType
|
|
|
|
|
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
|
|
|
|
MachineInstr *MI = SU->getInstr();
|
2020-10-20 07:54:24 +08:00
|
|
|
|
// If we are not in "HazardRecognizerMode" and therefore not being run from
|
|
|
|
|
// the scheduler, track possible stalls from hazards but don't insert noops.
|
|
|
|
|
auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
|
|
|
|
|
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
if (MI->isBundle())
|
|
|
|
|
return NoHazard;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
2016-05-02 22:48:03 +08:00
|
|
|
|
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
// FIXME: Should flat be considered vmem?
|
|
|
|
|
if ((SIInstrInfo::isVMEM(*MI) ||
|
|
|
|
|
SIInstrInfo::isFLAT(*MI))
|
|
|
|
|
&& checkVMEMHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2019-05-04 12:30:57 +08:00
|
|
|
|
|
2019-06-22 00:30:14 +08:00
|
|
|
|
if (checkFPAtomicToDenormModeHazard(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (ST.hasNoDataDepHazard())
|
|
|
|
|
return NoHazard;
|
|
|
|
|
|
2016-10-28 07:05:31 +08:00
|
|
|
|
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-28 07:05:31 +08:00
|
|
|
|
|
2016-05-03 00:23:09 +08:00
|
|
|
|
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-05-03 00:23:09 +08:00
|
|
|
|
|
2016-10-08 07:42:48 +08:00
|
|
|
|
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-08 07:42:48 +08:00
|
|
|
|
|
2016-10-28 07:42:29 +08:00
|
|
|
|
if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-28 07:42:29 +08:00
|
|
|
|
|
2016-10-15 08:58:14 +08:00
|
|
|
|
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
|
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-28 04:39:09 +08:00
|
|
|
|
|
2016-10-28 07:50:21 +08:00
|
|
|
|
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2016-10-28 07:50:21 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (ST.hasReadM0MovRelInterpHazard() &&
|
|
|
|
|
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
|
|
|
|
|
checkReadM0Hazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2017-11-18 05:35:32 +08:00
|
|
|
|
|
2019-01-16 23:43:53 +08:00
|
|
|
|
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
|
2017-02-19 02:29:53 +08:00
|
|
|
|
checkReadM0Hazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2017-02-19 02:29:53 +08:00
|
|
|
|
|
2019-07-12 05:30:34 +08:00
|
|
|
|
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2019-07-12 05:30:34 +08:00
|
|
|
|
|
2020-08-15 06:38:13 +08:00
|
|
|
|
if ((SIInstrInfo::isVMEM(*MI) ||
|
|
|
|
|
SIInstrInfo::isFLAT(*MI) ||
|
|
|
|
|
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2019-07-12 05:30:34 +08:00
|
|
|
|
|
2017-12-08 04:34:25 +08:00
|
|
|
|
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
|
2020-10-20 07:54:24 +08:00
|
|
|
|
return HazardType;
|
2017-12-08 04:34:25 +08:00
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
return NoHazard;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-29 03:22:23 +08:00
|
|
|
|
static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
|
|
|
|
|
unsigned Quantity) {
|
|
|
|
|
while (Quantity > 0) {
|
|
|
|
|
unsigned Arg;
|
|
|
|
|
if (Quantity >= 8)
|
|
|
|
|
Arg = 7;
|
|
|
|
|
else
|
|
|
|
|
Arg = Quantity - 1;
|
|
|
|
|
Quantity -= Arg + 1;
|
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
|
|
|
|
|
.addImm(Arg);
|
|
|
|
|
}
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::processBundle() {
|
|
|
|
|
MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
|
|
|
|
|
MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
|
|
|
|
|
// Check bundled MachineInstr's for hazards.
|
|
|
|
|
for (; MI != E && MI->isInsideBundle(); ++MI) {
|
|
|
|
|
CurrCycleInstr = &*MI;
|
|
|
|
|
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
|
|
|
|
|
|
2020-10-29 03:22:23 +08:00
|
|
|
|
if (IsHazardRecognizerMode) {
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
fixHazards(CurrCycleInstr);
|
|
|
|
|
|
2020-10-29 03:22:23 +08:00
|
|
|
|
insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
|
|
|
|
|
}
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
|
|
|
|
|
// It’s unnecessary to track more than MaxLookAhead instructions. Since we
|
|
|
|
|
// include the bundled MI directly after, only add a maximum of
|
|
|
|
|
// (MaxLookAhead - 1) noops to EmittedInstrs.
|
|
|
|
|
for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
|
|
|
|
|
EmittedInstrs.push_front(nullptr);
|
|
|
|
|
|
|
|
|
|
EmittedInstrs.push_front(CurrCycleInstr);
|
|
|
|
|
EmittedInstrs.resize(MaxLookAhead);
|
|
|
|
|
}
|
|
|
|
|
CurrCycleInstr = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
2019-01-22 03:11:26 +08:00
|
|
|
|
IsHazardRecognizerMode = true;
|
|
|
|
|
CurrCycleInstr = MI;
|
|
|
|
|
unsigned W = PreEmitNoopsCommon(MI);
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
fixHazards(MI);
|
2019-01-22 03:11:26 +08:00
|
|
|
|
CurrCycleInstr = nullptr;
|
|
|
|
|
return W;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
if (MI->isBundle())
|
|
|
|
|
return 0;
|
|
|
|
|
|
2020-07-18 00:48:36 +08:00
|
|
|
|
int WaitStates = 0;
|
2017-02-19 02:29:53 +08:00
|
|
|
|
|
2016-05-02 22:48:03 +08:00
|
|
|
|
if (SIInstrInfo::isSMRD(*MI))
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return std::max(WaitStates, checkSMRDHazards(MI));
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
|
|
|
|
|
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
|
2016-05-03 00:23:09 +08:00
|
|
|
|
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (ST.hasNSAtoVMEMBug())
|
|
|
|
|
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
|
|
|
|
|
|
2019-06-22 00:30:14 +08:00
|
|
|
|
WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
|
|
|
|
|
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (ST.hasNoDataDepHazard())
|
|
|
|
|
return WaitStates;
|
|
|
|
|
|
|
|
|
|
if (SIInstrInfo::isVALU(*MI))
|
|
|
|
|
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (SIInstrInfo::isDPP(*MI))
|
|
|
|
|
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
|
2016-10-28 07:05:31 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (isDivFMas(MI->getOpcode()))
|
|
|
|
|
WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
|
2016-10-28 07:05:31 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (isRWLane(MI->getOpcode()))
|
|
|
|
|
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
|
2016-10-08 07:42:48 +08:00
|
|
|
|
|
2017-12-08 04:34:25 +08:00
|
|
|
|
if (MI->isInlineAsm())
|
|
|
|
|
return std::max(WaitStates, checkInlineAsmHazards(MI));
|
|
|
|
|
|
2016-10-15 08:58:14 +08:00
|
|
|
|
if (isSGetReg(MI->getOpcode()))
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return std::max(WaitStates, checkGetRegHazards(MI));
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
|
if (isSSetReg(MI->getOpcode()))
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return std::max(WaitStates, checkSetRegHazards(MI));
|
2016-10-28 04:39:09 +08:00
|
|
|
|
|
2016-10-28 07:50:21 +08:00
|
|
|
|
if (isRFE(MI->getOpcode()))
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return std::max(WaitStates, checkRFEHazards(MI));
|
2016-10-28 07:50:21 +08:00
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
|
|
|
|
|
isSMovRel(MI->getOpcode())))
|
|
|
|
|
return std::max(WaitStates, checkReadM0Hazards(MI));
|
|
|
|
|
|
2019-01-16 23:43:53 +08:00
|
|
|
|
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return std::max(WaitStates, checkReadM0Hazards(MI));
|
|
|
|
|
|
2019-07-12 05:30:34 +08:00
|
|
|
|
if (SIInstrInfo::isMAI(*MI))
|
|
|
|
|
return std::max(WaitStates, checkMAIHazards(MI));
|
|
|
|
|
|
2020-08-15 06:38:13 +08:00
|
|
|
|
if (SIInstrInfo::isVMEM(*MI) ||
|
|
|
|
|
SIInstrInfo::isFLAT(*MI) ||
|
|
|
|
|
SIInstrInfo::isDS(*MI))
|
2019-07-12 05:30:34 +08:00
|
|
|
|
return std::max(WaitStates, checkMAILdStHazards(MI));
|
|
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
|
return WaitStates;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::EmitNoop() {
|
|
|
|
|
EmittedInstrs.push_front(nullptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::AdvanceCycle() {
|
|
|
|
|
// When the scheduler detects a stall, it will call AdvanceCycle() without
|
|
|
|
|
// emitting any instructions.
|
2020-10-20 07:54:24 +08:00
|
|
|
|
if (!CurrCycleInstr) {
|
|
|
|
|
EmittedInstrs.push_front(nullptr);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
return;
|
2020-10-20 07:54:24 +08:00
|
|
|
|
}
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
2018-09-10 18:14:48 +08:00
|
|
|
|
// Do not track non-instructions which do not affect the wait states.
|
|
|
|
|
// If included, these instructions can lead to buffer overflow such that
|
|
|
|
|
// detectable hazards are missed.
|
2019-03-05 18:25:16 +08:00
|
|
|
|
if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
|
2020-10-20 07:54:24 +08:00
|
|
|
|
CurrCycleInstr->isKill()) {
|
|
|
|
|
CurrCycleInstr = nullptr;
|
2018-09-10 18:14:48 +08:00
|
|
|
|
return;
|
2020-10-20 07:54:24 +08:00
|
|
|
|
}
|
2018-09-10 18:14:48 +08:00
|
|
|
|
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
if (CurrCycleInstr->isBundle()) {
|
|
|
|
|
processBundle();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-18 05:36:28 +08:00
|
|
|
|
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
// Keep track of emitted instructions
|
|
|
|
|
EmittedInstrs.push_front(CurrCycleInstr);
|
|
|
|
|
|
|
|
|
|
// Add a nullptr for each additional wait state after the first. Make sure
|
|
|
|
|
// not to add more than getMaxLookAhead() items to the list, since we
|
|
|
|
|
// truncate the list to that size right after this loop.
|
|
|
|
|
for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
|
|
|
|
|
i < e; ++i) {
|
|
|
|
|
EmittedInstrs.push_front(nullptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// getMaxLookahead() is the largest number of wait states we will ever need
|
|
|
|
|
// to insert, so there is no point in keeping track of more than that many
|
|
|
|
|
// wait states.
|
|
|
|
|
EmittedInstrs.resize(getMaxLookAhead());
|
|
|
|
|
|
|
|
|
|
CurrCycleInstr = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GCNHazardRecognizer::RecedeCycle() {
|
|
|
|
|
llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// Helper Functions
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
|
|
|
|
|
|
|
|
|
|
// Returns a minimum wait states since \p I walking all predecessors.
|
|
|
|
|
// Only scans until \p IsExpired does not return true.
|
|
|
|
|
// Can only be run in a hazard recognizer mode.
|
|
|
|
|
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
|
|
|
|
|
MachineBasicBlock *MBB,
|
|
|
|
|
MachineBasicBlock::reverse_instr_iterator I,
|
|
|
|
|
int WaitStates,
|
|
|
|
|
IsExpiredFn IsExpired,
|
|
|
|
|
DenseSet<const MachineBasicBlock *> &Visited) {
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
for (auto E = MBB->instr_rend(); I != E; ++I) {
|
|
|
|
|
// Don't add WaitStates for parent BUNDLE instructions.
|
|
|
|
|
if (I->isBundle())
|
|
|
|
|
continue;
|
2019-01-22 03:11:26 +08:00
|
|
|
|
|
|
|
|
|
if (IsHazard(&*I))
|
|
|
|
|
return WaitStates;
|
|
|
|
|
|
2020-09-10 06:08:48 +08:00
|
|
|
|
if (I->isInlineAsm() || I->isMetaInstruction())
|
2019-01-22 03:11:26 +08:00
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
WaitStates += SIInstrInfo::getNumWaitStates(*I);
|
|
|
|
|
|
|
|
|
|
if (IsExpired(&*I, WaitStates))
|
|
|
|
|
return std::numeric_limits<int>::max();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int MinWaitStates = WaitStates;
|
|
|
|
|
bool Found = false;
|
|
|
|
|
for (MachineBasicBlock *Pred : MBB->predecessors()) {
|
|
|
|
|
if (!Visited.insert(Pred).second)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
|
|
|
|
|
WaitStates, IsExpired, Visited);
|
|
|
|
|
|
|
|
|
|
if (W == std::numeric_limits<int>::max())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
|
|
|
|
|
if (IsExpired(nullptr, MinWaitStates))
|
|
|
|
|
return MinWaitStates;
|
|
|
|
|
|
|
|
|
|
Found = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Found)
|
|
|
|
|
return MinWaitStates;
|
|
|
|
|
|
|
|
|
|
return std::numeric_limits<int>::max();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
|
|
|
|
|
MachineInstr *MI,
|
|
|
|
|
IsExpiredFn IsExpired) {
|
|
|
|
|
DenseSet<const MachineBasicBlock *> Visited;
|
|
|
|
|
return getWaitStatesSince(IsHazard, MI->getParent(),
|
|
|
|
|
std::next(MI->getReverseIterator()),
|
|
|
|
|
0, IsExpired, Visited);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
|
|
|
|
|
if (IsHazardRecognizerMode) {
|
|
|
|
|
auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
|
|
|
|
|
return WaitStates >= Limit;
|
|
|
|
|
};
|
|
|
|
|
return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-02 00:56:32 +08:00
|
|
|
|
int WaitStates = 0;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
for (MachineInstr *MI : EmittedInstrs) {
|
2017-09-02 00:56:32 +08:00
|
|
|
|
if (MI) {
|
|
|
|
|
if (IsHazard(MI))
|
|
|
|
|
return WaitStates;
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
if (MI->isInlineAsm())
|
2017-09-02 00:56:32 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-04-30 08:23:06 +08:00
|
|
|
|
++WaitStates;
|
2019-01-22 03:11:26 +08:00
|
|
|
|
|
|
|
|
|
if (WaitStates >= Limit)
|
|
|
|
|
break;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
}
|
|
|
|
|
return std::numeric_limits<int>::max();
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
|
|
|
|
|
IsHazardFn IsHazardDef,
|
|
|
|
|
int Limit) {
|
2016-10-28 07:05:31 +08:00
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
|
|
|
|
|
return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
|
|
|
|
|
};
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
return getWaitStatesSince(IsHazardFn, Limit);
|
2016-10-28 07:05:31 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
|
|
|
|
|
int Limit) {
|
2016-10-28 07:05:31 +08:00
|
|
|
|
auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
|
|
|
|
|
return isSSetReg(MI->getOpcode()) && IsHazard(MI);
|
|
|
|
|
};
|
|
|
|
|
|
2019-01-22 03:11:26 +08:00
|
|
|
|
return getWaitStatesSince(IsHazardFn, Limit);
|
2016-10-15 08:58:14 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// No-op Hazard Detection
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2017-11-17 12:18:24 +08:00
|
|
|
|
static void addRegUnits(const SIRegisterInfo &TRI,
|
|
|
|
|
BitVector &BV, unsigned Reg) {
|
|
|
|
|
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
|
|
|
|
|
BV.set(*RUI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void addRegsToSet(const SIRegisterInfo &TRI,
|
|
|
|
|
iterator_range<MachineInstr::const_mop_iterator> Ops,
|
|
|
|
|
BitVector &Set) {
|
2016-05-03 01:39:06 +08:00
|
|
|
|
for (const MachineOperand &Op : Ops) {
|
|
|
|
|
if (Op.isReg())
|
2017-11-17 12:18:24 +08:00
|
|
|
|
addRegUnits(TRI, Set, Op.getReg());
|
2016-05-03 01:39:06 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-17 12:18:24 +08:00
|
|
|
|
void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
|
|
|
|
|
// XXX: Do we need to worry about implicit operands
|
|
|
|
|
addRegsToSet(TRI, MI.defs(), ClauseDefs);
|
|
|
|
|
addRegsToSet(TRI, MI.uses(), ClauseUses);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-05 00:41:08 +08:00
|
|
|
|
static bool breaksSMEMSoftClause(MachineInstr *MI) {
|
|
|
|
|
return !SIInstrInfo::isSMRD(*MI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool breaksVMEMSoftClause(MachineInstr *MI) {
|
|
|
|
|
return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
|
2017-11-17 12:18:24 +08:00
|
|
|
|
// SMEM soft clause are only present on VI+, and only matter if xnack is
|
|
|
|
|
// enabled.
|
|
|
|
|
if (!ST.isXNACKEnabled())
|
2016-05-03 01:39:06 +08:00
|
|
|
|
return 0;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
bool IsSMRD = TII.isSMRD(*MEM);
|
|
|
|
|
|
2017-11-17 12:18:24 +08:00
|
|
|
|
resetClause();
|
|
|
|
|
|
2016-05-03 01:39:06 +08:00
|
|
|
|
// A soft-clause is any group of consecutive SMEM instructions. The
|
|
|
|
|
// instructions in this group may return out of order and/or may be
|
|
|
|
|
// replayed (i.e. the same instruction issued more than once).
|
|
|
|
|
//
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
// In order to handle these situations correctly we need to make sure that
|
|
|
|
|
// when a clause has more than one instruction, no instruction in the clause
|
|
|
|
|
// writes to a register that is read by another instruction in the clause
|
2016-05-03 01:39:06 +08:00
|
|
|
|
// (including itself). If we encounter this situaion, we need to break the
|
|
|
|
|
// clause by inserting a non SMEM instruction.
|
|
|
|
|
|
|
|
|
|
for (MachineInstr *MI : EmittedInstrs) {
|
|
|
|
|
// When we hit a non-SMEM instruction then we have passed the start of the
|
|
|
|
|
// clause and we can stop.
|
2017-11-18 05:35:32 +08:00
|
|
|
|
if (!MI)
|
|
|
|
|
break;
|
|
|
|
|
|
2020-05-05 00:41:08 +08:00
|
|
|
|
if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI))
|
2016-05-03 01:39:06 +08:00
|
|
|
|
break;
|
|
|
|
|
|
2017-11-17 12:18:24 +08:00
|
|
|
|
addClauseInst(*MI);
|
2016-05-03 01:39:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-11-17 12:18:24 +08:00
|
|
|
|
if (ClauseDefs.none())
|
2016-05-03 01:39:06 +08:00
|
|
|
|
return 0;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
// We need to make sure not to put loads and stores in the same clause if they
|
|
|
|
|
// use the same address. For now, just start a new clause whenever we see a
|
|
|
|
|
// store.
|
|
|
|
|
if (MEM->mayStore())
|
2016-05-03 01:39:06 +08:00
|
|
|
|
return 1;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
addClauseInst(*MEM);
|
2016-05-03 01:39:06 +08:00
|
|
|
|
|
|
|
|
|
// If the set of defs and uses intersect then we cannot add this instruction
|
|
|
|
|
// to the clause, so we have a hazard.
|
2017-11-17 12:18:24 +08:00
|
|
|
|
return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
|
2016-05-03 01:39:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
|
2016-05-03 01:39:06 +08:00
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
WaitStatesNeeded = checkSoftClauseHazards(SMRD);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
// This SMRD hazard only affects SI.
|
2019-06-20 07:54:58 +08:00
|
|
|
|
if (!ST.hasSMRDReadVALUDefHazard())
|
2016-05-03 01:39:06 +08:00
|
|
|
|
return WaitStatesNeeded;
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
// A read of an SGPR by SMRD instruction requires 4 wait states when the
|
|
|
|
|
// SGPR was written by a VALU instruction.
|
|
|
|
|
int SmrdSgprWaitStates = 4;
|
2017-03-18 05:36:28 +08:00
|
|
|
|
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
|
2017-10-26 22:43:02 +08:00
|
|
|
|
auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
|
|
|
|
|
|
2017-11-17 12:18:26 +08:00
|
|
|
|
bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Use : SMRD->uses()) {
|
|
|
|
|
if (!Use.isReg())
|
|
|
|
|
continue;
|
|
|
|
|
int WaitStatesNeededForUse =
|
2019-01-22 03:11:26 +08:00
|
|
|
|
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
|
|
|
|
|
SmrdSgprWaitStates);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
2017-10-26 22:43:02 +08:00
|
|
|
|
|
|
|
|
|
// This fixes what appears to be undocumented hardware behavior in SI where
|
|
|
|
|
// s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
|
|
|
|
|
// needs some number of nops in between. We don't know how many we need, but
|
|
|
|
|
// let's use 4. This wasn't discovered before probably because the only
|
|
|
|
|
// case when this happens is when we expand a 64-bit pointer into a full
|
|
|
|
|
// descriptor and use s_buffer_load_dword instead of s_load_dword, which was
|
|
|
|
|
// probably never encountered in the closed-source land.
|
|
|
|
|
if (IsBufferSMRD) {
|
|
|
|
|
int WaitStatesNeededForUse =
|
|
|
|
|
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
|
2019-01-22 03:11:26 +08:00
|
|
|
|
IsBufferHazardDefFn,
|
|
|
|
|
SmrdSgprWaitStates);
|
2017-10-26 22:43:02 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
}
|
2016-04-30 08:23:06 +08:00
|
|
|
|
}
|
2017-10-26 22:43:02 +08:00
|
|
|
|
|
2016-04-30 08:23:06 +08:00
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
|
2019-06-20 07:54:58 +08:00
|
|
|
|
if (!ST.hasVMEMReadSGPRVALUDefHazard())
|
2016-04-30 08:23:06 +08:00
|
|
|
|
return 0;
|
|
|
|
|
|
2017-11-18 05:35:32 +08:00
|
|
|
|
int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
|
|
|
|
|
// A read of an SGPR by a VMEM instruction requires 5 wait states when the
|
|
|
|
|
// SGPR was written by a VALU Instruction.
|
2017-11-18 05:35:32 +08:00
|
|
|
|
const int VmemSgprWaitStates = 5;
|
|
|
|
|
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
|
2016-04-30 08:23:06 +08:00
|
|
|
|
for (const MachineOperand &Use : VMEM->uses()) {
|
|
|
|
|
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse =
|
2019-01-22 03:11:26 +08:00
|
|
|
|
VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
|
|
|
|
|
VmemSgprWaitStates);
|
2016-04-30 08:23:06 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
}
|
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
2016-05-03 00:23:09 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
|
2016-06-24 14:30:11 +08:00
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
2017-08-04 09:09:43 +08:00
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2016-05-03 00:23:09 +08:00
|
|
|
|
|
2017-08-04 09:09:43 +08:00
|
|
|
|
// Check for DPP VGPR read after VALU VGPR write and EXEC write.
|
2016-05-03 00:23:09 +08:00
|
|
|
|
int DppVgprWaitStates = 2;
|
2017-08-04 09:09:43 +08:00
|
|
|
|
int DppExecWaitStates = 5;
|
2016-05-03 00:23:09 +08:00
|
|
|
|
int WaitStatesNeeded = 0;
|
2017-08-04 09:09:43 +08:00
|
|
|
|
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
|
2016-05-03 00:23:09 +08:00
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Use : DPP->uses()) {
|
|
|
|
|
if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
|
|
|
|
|
continue;
|
|
|
|
|
int WaitStatesNeededForUse =
|
2019-01-22 03:11:26 +08:00
|
|
|
|
DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
|
|
|
|
|
[](MachineInstr *) { return true; },
|
|
|
|
|
DppVgprWaitStates);
|
2016-05-03 00:23:09 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-04 09:09:43 +08:00
|
|
|
|
WaitStatesNeeded = std::max(
|
|
|
|
|
WaitStatesNeeded,
|
2019-01-22 03:11:26 +08:00
|
|
|
|
DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
|
|
|
|
|
DppExecWaitStates));
|
2017-08-04 09:09:43 +08:00
|
|
|
|
|
2016-05-03 00:23:09 +08:00
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
2016-10-08 07:42:48 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
|
|
|
|
|
// v_div_fmas requires 4 wait states after a write to vcc from a VALU
|
|
|
|
|
// instruction.
|
|
|
|
|
const int DivFMasWaitStates = 4;
|
|
|
|
|
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
|
|
|
|
|
DivFMasWaitStates);
|
2016-10-08 07:42:48 +08:00
|
|
|
|
|
|
|
|
|
return DivFMasWaitStates - WaitStatesNeeded;
|
|
|
|
|
}
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
|
|
|
|
|
|
|
|
|
|
const int GetRegWaitStates = 2;
|
|
|
|
|
auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
|
|
|
|
|
return GetRegHWReg == getHWReg(TII, *MI);
|
|
|
|
|
};
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
|
|
|
|
return GetRegWaitStates - WaitStatesNeeded;
|
|
|
|
|
}
|
2016-10-28 04:39:09 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
unsigned HWReg = getHWReg(TII, *SetRegInstr);
|
|
|
|
|
|
2019-06-20 07:54:58 +08:00
|
|
|
|
const int SetRegWaitStates = ST.getSetRegWaitStates();
|
2016-10-28 04:39:09 +08:00
|
|
|
|
auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
|
|
|
|
|
return HWReg == getHWReg(TII, *MI);
|
|
|
|
|
};
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
|
2016-10-28 04:39:09 +08:00
|
|
|
|
return SetRegWaitStates - WaitStatesNeeded;
|
|
|
|
|
}
|
2016-10-28 07:05:31 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
|
|
|
|
|
if (!MI.mayStore())
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
|
const MCInstrDesc &Desc = MI.getDesc();
|
|
|
|
|
|
|
|
|
|
int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
|
|
|
|
|
int VDataRCID = -1;
|
|
|
|
|
if (VDataIdx != -1)
|
|
|
|
|
VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
|
|
|
|
|
|
|
|
|
|
if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
|
2016-11-16 07:55:15 +08:00
|
|
|
|
// There is no hazard if the instruction does not use vector regs
|
|
|
|
|
// (like wbinvl1)
|
|
|
|
|
if (VDataIdx == -1)
|
|
|
|
|
return -1;
|
2016-10-28 07:05:31 +08:00
|
|
|
|
// For MUBUF/MTBUF instructions this hazard only exists if the
|
|
|
|
|
// instruction is not using a register in the soffset field.
|
|
|
|
|
const MachineOperand *SOffset =
|
|
|
|
|
TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
|
|
|
|
|
// If we have no soffset operand, then assume this field has been
|
|
|
|
|
// hardcoded to zero.
|
|
|
|
|
if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
|
|
|
|
|
(!SOffset || !SOffset->isReg()))
|
|
|
|
|
return VDataIdx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MIMG instructions create a hazard if they don't use a 256-bit T# and
|
|
|
|
|
// the store size is greater than 8 bytes and they have more than two bits
|
|
|
|
|
// of their dmask set.
|
|
|
|
|
// All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
|
|
|
|
|
if (TII->isMIMG(MI)) {
|
|
|
|
|
int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
|
|
|
|
|
assert(SRsrcIdx != -1 &&
|
|
|
|
|
AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
|
2016-10-28 07:28:03 +08:00
|
|
|
|
(void)SRsrcIdx;
|
2016-10-28 07:05:31 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (TII->isFLAT(MI)) {
|
2016-11-30 03:30:44 +08:00
|
|
|
|
int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
|
2016-10-28 07:05:31 +08:00
|
|
|
|
if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
|
|
|
|
|
return DataIdx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-30 03:21:28 +08:00
|
|
|
|
int
|
|
|
|
|
GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
|
|
|
|
|
const MachineRegisterInfo &MRI) {
|
2017-12-08 04:34:25 +08:00
|
|
|
|
// Helper to check for the hazard where VMEM instructions that store more than
|
|
|
|
|
// 8 bytes can have there store data over written by the next instruction.
|
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
|
|
|
|
|
|
const int VALUWaitStates = 1;
|
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
|
|
|
|
|
if (!TRI->isVGPR(MRI, Def.getReg()))
|
|
|
|
|
return WaitStatesNeeded;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register Reg = Def.getReg();
|
2017-12-08 04:34:25 +08:00
|
|
|
|
auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
|
|
|
|
|
int DataIdx = createsVALUHazard(*MI);
|
|
|
|
|
return DataIdx >= 0 &&
|
|
|
|
|
TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
|
|
|
|
|
};
|
|
|
|
|
int WaitStatesNeededForDef =
|
2019-01-22 03:11:26 +08:00
|
|
|
|
VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
|
2017-12-08 04:34:25 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
|
|
|
|
|
|
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-28 07:05:31 +08:00
|
|
|
|
int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
|
|
|
|
|
// This checks for the hazard where VMEM instructions that store more than
|
|
|
|
|
// 8 bytes can have there store data over written by the next instruction.
|
|
|
|
|
if (!ST.has12DWordStoreHazard())
|
|
|
|
|
return 0;
|
|
|
|
|
|
2017-12-08 04:34:25 +08:00
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
2016-10-28 07:05:31 +08:00
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Def : VALU->defs()) {
|
2017-12-08 04:34:25 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
|
|
|
|
|
// This checks for hazards associated with inline asm statements.
|
|
|
|
|
// Since inline asms can contain just about anything, we use this
|
|
|
|
|
// to call/leverage other check*Hazard routines. Note that
|
|
|
|
|
// this function doesn't attempt to address all possible inline asm
|
|
|
|
|
// hazards (good luck), but is a collection of what has been
|
|
|
|
|
// problematic thus far.
|
|
|
|
|
|
|
|
|
|
// see checkVALUHazards()
|
|
|
|
|
if (!ST.has12DWordStoreHazard())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
|
|
|
|
|
for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
const MachineOperand &Op = IA->getOperand(I);
|
|
|
|
|
if (Op.isReg() && Op.isDef()) {
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
|
|
|
|
|
}
|
2016-10-28 07:05:31 +08:00
|
|
|
|
}
|
2017-12-08 04:34:25 +08:00
|
|
|
|
|
2016-10-28 07:05:31 +08:00
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
2016-10-28 07:42:29 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
2017-12-08 04:34:25 +08:00
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
2016-10-28 07:42:29 +08:00
|
|
|
|
|
|
|
|
|
const MachineOperand *LaneSelectOp =
|
|
|
|
|
TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
|
|
|
|
|
|
|
|
|
|
if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
|
|
|
|
|
return 0;
|
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register LaneSelectReg = LaneSelectOp->getReg();
|
2016-10-28 07:42:29 +08:00
|
|
|
|
auto IsHazardFn = [TII] (MachineInstr *MI) {
|
|
|
|
|
return TII->isVALU(*MI);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const int RWLaneWaitStates = 4;
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
|
|
|
|
|
RWLaneWaitStates);
|
2016-10-28 07:42:29 +08:00
|
|
|
|
return RWLaneWaitStates - WaitStatesSince;
|
|
|
|
|
}
|
2016-10-28 07:50:21 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
|
2019-06-20 07:54:58 +08:00
|
|
|
|
if (!ST.hasRFEHazards())
|
2016-10-28 07:50:21 +08:00
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
|
|
|
|
|
const int RFEWaitStates = 1;
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [TII] (MachineInstr *MI) {
|
|
|
|
|
return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
|
|
|
|
|
};
|
2019-01-22 03:11:26 +08:00
|
|
|
|
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
|
2016-10-28 07:50:21 +08:00
|
|
|
|
return RFEWaitStates - WaitStatesNeeded;
|
|
|
|
|
}
|
2017-02-19 02:29:53 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2017-11-18 05:35:32 +08:00
|
|
|
|
const int SMovRelWaitStates = 1;
|
2017-02-19 02:29:53 +08:00
|
|
|
|
auto IsHazardFn = [TII] (MachineInstr *MI) {
|
|
|
|
|
return TII->isSALU(*MI);
|
|
|
|
|
};
|
2019-01-22 03:11:26 +08:00
|
|
|
|
return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
|
|
|
|
|
SMovRelWaitStates);
|
2017-02-19 02:29:53 +08:00
|
|
|
|
}
|
2019-05-04 12:30:57 +08:00
|
|
|
|
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
|
|
|
|
fixVMEMtoScalarWriteHazards(MI);
|
2019-06-13 01:52:51 +08:00
|
|
|
|
fixVcmpxPermlaneHazards(MI);
|
[AMDGPU] Check MI bundles for hazards
Summary: GCNHazardRecognizer fails to identify hazards that are in and around bundles. This patch allows the hazard recognizer to consider bundled instructions in both scheduler and hazard recognizer mode. We ignore “bundledness” for the purpose of detecting hazards and examine the instructions individually.
Reviewers: arsenm, msearles, rampitec
Reviewed By: rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61564
llvm-svn: 360199
2019-05-08 06:12:15 +08:00
|
|
|
|
fixSMEMtoVectorWriteHazards(MI);
|
|
|
|
|
fixVcmpxExecWARHazard(MI);
|
|
|
|
|
fixLdsBranchVmemWARHazard(MI);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-13 01:52:51 +08:00
|
|
|
|
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
auto IsHazardFn = [TII] (MachineInstr *MI) {
|
|
|
|
|
return TII->isVOPC(*MI);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto IsExpiredFn = [] (MachineInstr *MI, int) {
|
|
|
|
|
if (!MI)
|
|
|
|
|
return false;
|
|
|
|
|
unsigned Opc = MI->getOpcode();
|
|
|
|
|
return SIInstrInfo::isVALU(*MI) &&
|
|
|
|
|
Opc != AMDGPU::V_NOP_e32 &&
|
|
|
|
|
Opc != AMDGPU::V_NOP_e64 &&
|
|
|
|
|
Opc != AMDGPU::V_NOP_sdwa;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
|
|
|
|
std::numeric_limits<int>::max())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// V_NOP will be discarded by SQ.
|
|
|
|
|
// Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
|
|
|
|
|
// which is always a VGPR and available.
|
|
|
|
|
auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register Reg = Src0->getReg();
|
2019-06-13 01:52:51 +08:00
|
|
|
|
bool IsUndef = Src0->isUndef();
|
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
|
|
|
|
TII->get(AMDGPU::V_MOV_B32_e32))
|
|
|
|
|
.addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
|
|
|
|
|
.addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-05-04 12:30:57 +08:00
|
|
|
|
bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasVMEMtoScalarWriteHazard())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (MI->getNumDefs() == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
|
|
|
|
|
if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
|
|
|
|
|
!SIInstrInfo::isFLAT(*I))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Def : MI->defs()) {
|
|
|
|
|
MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
|
2019-07-17 19:22:57 +08:00
|
|
|
|
if (!Op)
|
2019-05-04 12:30:57 +08:00
|
|
|
|
continue;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
2020-07-16 10:07:26 +08:00
|
|
|
|
auto IsExpiredFn = [](MachineInstr *MI, int) {
|
2019-05-04 12:30:57 +08:00
|
|
|
|
return MI && (SIInstrInfo::isVALU(*MI) ||
|
|
|
|
|
(MI->getOpcode() == AMDGPU::S_WAITCNT &&
|
2020-07-16 10:07:26 +08:00
|
|
|
|
!MI->getOperand(0).getImm()) ||
|
|
|
|
|
(MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
|
|
|
|
|
MI->getOperand(0).getImm() == 0xffe3));
|
2019-05-04 12:30:57 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
|
|
|
|
std::numeric_limits<int>::max())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2020-07-16 10:07:26 +08:00
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
|
|
|
|
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
|
|
|
|
|
.addImm(0xffe3);
|
2019-05-04 12:30:57 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasSMEMtoVectorWriteHazard())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!SIInstrInfo::isVALU(*MI))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
unsigned SDSTName;
|
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
|
case AMDGPU::V_READLANE_B32:
|
AMDGPU: Fix SMEM WAR hazard for gfx10 readlane
Summary: Hazard recognizer fails to see hazard with V_READLANE_B32_gfx10.
Reviewers: rampitec
Reviewed By: rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69172
llvm-svn: 375265
2019-10-19 02:20:30 +08:00
|
|
|
|
case AMDGPU::V_READLANE_B32_gfx10:
|
2019-05-04 12:30:57 +08:00
|
|
|
|
case AMDGPU::V_READFIRSTLANE_B32:
|
|
|
|
|
SDSTName = AMDGPU::OpName::vdst;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
SDSTName = AMDGPU::OpName::sdst;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
2019-05-20 15:20:12 +08:00
|
|
|
|
const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
|
2019-05-04 12:30:57 +08:00
|
|
|
|
const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
|
|
|
|
|
if (!SDST) {
|
2019-05-04 14:40:20 +08:00
|
|
|
|
for (const auto &MO : MI->implicit_operands()) {
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
|
|
|
|
|
SDST = &MO;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!SDST)
|
|
|
|
|
return false;
|
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
const Register SDSTReg = SDST->getReg();
|
2019-05-04 12:30:57 +08:00
|
|
|
|
auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
|
|
|
|
|
return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
|
|
|
|
|
};
|
|
|
|
|
|
2019-05-20 15:20:12 +08:00
|
|
|
|
auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
|
2019-05-04 12:30:57 +08:00
|
|
|
|
if (MI) {
|
|
|
|
|
if (TII->isSALU(*MI)) {
|
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
|
case AMDGPU::S_SETVSKIP:
|
|
|
|
|
case AMDGPU::S_VERSION:
|
|
|
|
|
case AMDGPU::S_WAITCNT_VSCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_VMCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_EXPCNT:
|
2019-05-20 15:20:12 +08:00
|
|
|
|
// These instructions cannot not mitigate the hazard.
|
2019-05-04 12:30:57 +08:00
|
|
|
|
return false;
|
2019-05-20 15:20:12 +08:00
|
|
|
|
case AMDGPU::S_WAITCNT_LGKMCNT:
|
|
|
|
|
// Reducing lgkmcnt count to 0 always mitigates the hazard.
|
|
|
|
|
return (MI->getOperand(1).getImm() == 0) &&
|
|
|
|
|
(MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
|
|
|
|
|
case AMDGPU::S_WAITCNT: {
|
|
|
|
|
const int64_t Imm = MI->getOperand(0).getImm();
|
|
|
|
|
AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
|
|
|
|
|
return (Decoded.LgkmCnt == 0);
|
|
|
|
|
}
|
2019-05-04 12:30:57 +08:00
|
|
|
|
default:
|
2019-05-20 15:20:12 +08:00
|
|
|
|
// SOPP instructions cannot mitigate the hazard.
|
|
|
|
|
if (TII->isSOPP(*MI))
|
|
|
|
|
return false;
|
|
|
|
|
// At this point the SALU can be assumed to mitigate the hazard
|
|
|
|
|
// because either:
|
|
|
|
|
// (a) it is independent of the at risk SMEM (breaking chain),
|
|
|
|
|
// or
|
|
|
|
|
// (b) it is dependent on the SMEM, in which case an appropriate
|
|
|
|
|
// s_waitcnt lgkmcnt _must_ exist between it and the at risk
|
|
|
|
|
// SMEM instruction.
|
2019-05-04 12:30:57 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
|
|
|
|
std::numeric_limits<int>::max())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
|
|
|
|
TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
|
|
|
|
|
.addImm(0);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
|
if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [TRI] (MachineInstr *I) {
|
|
|
|
|
if (SIInstrInfo::isVALU(*I))
|
|
|
|
|
return false;
|
|
|
|
|
return I->readsRegister(AMDGPU::EXEC, TRI);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
|
|
|
|
|
if (!MI)
|
|
|
|
|
return false;
|
|
|
|
|
if (SIInstrInfo::isVALU(*MI)) {
|
|
|
|
|
if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
|
|
|
|
|
return true;
|
|
|
|
|
for (auto MO : MI->implicit_operands())
|
|
|
|
|
if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
|
|
|
|
|
(MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
|
|
|
|
std::numeric_limits<int>::max())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
|
|
|
|
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
|
|
|
|
|
.addImm(0xfffe);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasLdsBranchVmemWARHazard())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
auto IsHazardInst = [] (const MachineInstr *MI) {
|
|
|
|
|
if (SIInstrInfo::isDS(*MI))
|
|
|
|
|
return 1;
|
|
|
|
|
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
|
|
|
|
|
return 2;
|
|
|
|
|
return 0;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto InstType = IsHazardInst(MI);
|
|
|
|
|
if (!InstType)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
|
|
|
|
|
return I && (IsHazardInst(I) ||
|
|
|
|
|
(I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
|
|
|
|
|
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
|
|
|
|
|
!I->getOperand(1).getImm()));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
|
|
|
|
|
if (!I->isBranch())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
|
|
|
|
|
auto InstType2 = IsHazardInst(I);
|
|
|
|
|
return InstType2 && InstType != InstType2;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
|
|
|
|
|
if (!I)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
auto InstType2 = IsHazardInst(I);
|
|
|
|
|
if (InstType == InstType2)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
|
|
|
|
|
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
|
|
|
|
|
!I->getOperand(1).getImm();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
|
|
|
|
|
std::numeric_limits<int>::max();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
|
|
|
|
std::numeric_limits<int>::max())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
|
|
|
|
TII->get(AMDGPU::S_WAITCNT_VSCNT))
|
|
|
|
|
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
|
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
|
|
|
|
|
int NSAtoVMEMWaitStates = 1;
|
|
|
|
|
|
|
|
|
|
if (!ST.hasNSAtoVMEMBug())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
|
const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
|
|
|
|
|
if (!Offset || (Offset->getImm() & 6) == 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [TII] (MachineInstr *I) {
|
|
|
|
|
if (!SIInstrInfo::isMIMG(*I))
|
|
|
|
|
return false;
|
|
|
|
|
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
|
|
|
|
|
return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
|
|
|
|
|
TII->getInstSizeInBytes(*I) >= 16;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
|
|
|
|
|
}
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
|
|
|
|
|
int FPAtomicToDenormModeWaitStates = 3;
|
|
|
|
|
|
|
|
|
|
if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
auto IsHazardFn = [] (MachineInstr *I) {
|
|
|
|
|
if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
|
|
|
|
|
return false;
|
|
|
|
|
return SIInstrInfo::isFPAtomic(*I);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
|
|
|
|
|
if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
|
case AMDGPU::S_WAITCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_VSCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_VMCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_EXPCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_LGKMCNT:
|
|
|
|
|
case AMDGPU::S_WAITCNT_IDLE:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return FPAtomicToDenormModeWaitStates -
|
|
|
|
|
::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
|
|
|
|
|
}
|
2019-07-12 05:30:34 +08:00
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
|
|
|
|
|
assert(SIInstrInfo::isMAI(*MI));
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
unsigned Opc = MI->getOpcode();
|
|
|
|
|
|
|
|
|
|
auto IsVALUFn = [] (MachineInstr *MI) {
|
|
|
|
|
return SIInstrInfo::isVALU(*MI);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
|
|
|
|
|
const int LegacyVALUWritesVGPRWaitStates = 2;
|
|
|
|
|
const int VALUWritesExecWaitStates = 4;
|
|
|
|
|
const int MaxWaitStates = 4;
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse = VALUWritesExecWaitStates -
|
|
|
|
|
getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
|
|
|
|
|
if (WaitStatesNeeded < MaxWaitStates) {
|
|
|
|
|
for (const MachineOperand &Use : MI->explicit_uses()) {
|
|
|
|
|
const int MaxWaitStates = 2;
|
|
|
|
|
|
|
|
|
|
if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
|
|
|
|
|
getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
|
|
|
|
|
if (WaitStatesNeeded == MaxWaitStates)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto IsMFMAFn = [] (MachineInstr *MI) {
|
|
|
|
|
return SIInstrInfo::isMAI(*MI) &&
|
|
|
|
|
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
|
|
|
|
|
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Op : MI->explicit_operands()) {
|
|
|
|
|
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
|
|
|
|
|
const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
|
|
|
|
|
const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
|
|
|
|
|
const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
|
|
|
|
|
const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
|
|
|
|
|
const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
|
|
|
|
|
const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
|
|
|
|
|
const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
|
|
|
|
|
const int MaxWaitStates = 18;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register Reg = Op.getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
unsigned HazardDefLatency = 0;
|
|
|
|
|
|
|
|
|
|
auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
|
|
|
|
|
(MachineInstr *MI) {
|
|
|
|
|
if (!IsMFMAFn(MI))
|
|
|
|
|
return false;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register DstReg = MI->getOperand(0).getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
if (DstReg == Reg)
|
|
|
|
|
return false;
|
|
|
|
|
HazardDefLatency = std::max(HazardDefLatency,
|
|
|
|
|
TSchedModel.computeInstrLatency(MI));
|
|
|
|
|
return TRI.regsOverlap(DstReg, Reg);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
|
|
|
|
|
MaxWaitStates);
|
|
|
|
|
int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
|
|
|
|
|
int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
|
|
|
|
|
int OpNo = MI->getOperandNo(&Op);
|
|
|
|
|
if (OpNo == SrcCIdx) {
|
|
|
|
|
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
|
|
|
|
|
} else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
|
|
|
|
|
switch (HazardDefLatency) {
|
|
|
|
|
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 16: LLVM_FALLTHROUGH;
|
|
|
|
|
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
|
|
|
|
|
switch (HazardDefLatency) {
|
|
|
|
|
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 16: LLVM_FALLTHROUGH;
|
|
|
|
|
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
|
|
|
|
|
if (WaitStatesNeeded == MaxWaitStates)
|
|
|
|
|
return WaitStatesNeeded; // Early exit.
|
|
|
|
|
|
|
|
|
|
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
|
|
|
|
|
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
|
|
|
|
|
return false;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register DstReg = MI->getOperand(0).getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
return TRI.regsOverlap(Reg, DstReg);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
|
|
|
|
|
const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
|
|
|
|
|
const int AccVGPRWriteAccVgprReadWaitStates = 3;
|
|
|
|
|
NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
|
|
|
|
|
if (OpNo == SrcCIdx)
|
|
|
|
|
NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
|
|
|
|
|
else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
|
|
|
|
|
NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
|
|
|
|
|
|
|
|
|
|
WaitStatesNeededForUse = NeedWaitStates -
|
|
|
|
|
getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
|
|
|
|
|
if (WaitStatesNeeded == MaxWaitStates)
|
|
|
|
|
return WaitStatesNeeded; // Early exit.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
|
|
|
|
|
const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
|
|
|
|
|
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
|
|
|
|
|
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
|
|
|
|
|
const int MaxWaitStates = 13;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register DstReg = MI->getOperand(0).getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
unsigned HazardDefLatency = 0;
|
|
|
|
|
|
|
|
|
|
auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
|
|
|
|
|
(MachineInstr *MI) {
|
|
|
|
|
if (!IsMFMAFn(MI))
|
|
|
|
|
return false;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
HazardDefLatency = std::max(HazardDefLatency,
|
|
|
|
|
TSchedModel.computeInstrLatency(MI));
|
|
|
|
|
return TRI.regsOverlap(Reg, DstReg);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
|
|
|
|
|
int NeedWaitStates;
|
|
|
|
|
switch (HazardDefLatency) {
|
|
|
|
|
case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
case 16: LLVM_FALLTHROUGH;
|
|
|
|
|
default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
|
|
|
|
|
if (!ST.hasMAIInsts())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeeded = 0;
|
|
|
|
|
|
|
|
|
|
auto IsAccVgprReadFn = [] (MachineInstr *MI) {
|
|
|
|
|
return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const MachineOperand &Op : MI->explicit_uses()) {
|
|
|
|
|
if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
|
|
|
|
|
continue;
|
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
|
Register Reg = Op.getReg();
|
2019-07-12 05:30:34 +08:00
|
|
|
|
|
|
|
|
|
const int AccVgprReadLdStWaitStates = 2;
|
2020-10-09 04:19:34 +08:00
|
|
|
|
const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
|
2019-07-12 05:30:34 +08:00
|
|
|
|
const int MaxWaitStates = 2;
|
|
|
|
|
|
|
|
|
|
int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
|
|
|
|
|
getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
|
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
|
|
|
|
|
if (WaitStatesNeeded == MaxWaitStates)
|
|
|
|
|
return WaitStatesNeeded; // Early exit.
|
|
|
|
|
|
2020-10-09 04:19:34 +08:00
|
|
|
|
auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) {
|
|
|
|
|
if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32 &&
|
|
|
|
|
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
|
2019-07-12 05:30:34 +08:00
|
|
|
|
return false;
|
|
|
|
|
auto IsVALUFn = [] (MachineInstr *MI) {
|
|
|
|
|
return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
|
|
|
|
|
};
|
|
|
|
|
return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
|
|
|
|
|
std::numeric_limits<int>::max();
|
|
|
|
|
};
|
|
|
|
|
|
2020-10-09 04:19:34 +08:00
|
|
|
|
WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
|
|
|
|
|
getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
|
2019-07-12 05:30:34 +08:00
|
|
|
|
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return WaitStatesNeeded;
|
|
|
|
|
}
|
2020-07-30 02:47:18 +08:00
|
|
|
|
|
|
|
|
|
bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
|
|
|
|
|
if (!SU->isInstr())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
MachineInstr *MAI = nullptr;
|
|
|
|
|
auto IsMFMAFn = [&MAI] (MachineInstr *MI) {
|
|
|
|
|
MAI = nullptr;
|
|
|
|
|
if (SIInstrInfo::isMAI(*MI) &&
|
|
|
|
|
MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
|
|
|
|
|
MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
|
|
|
|
|
MAI = MI;
|
|
|
|
|
return MAI != nullptr;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
MachineInstr *MI = SU->getInstr();
|
|
|
|
|
if (IsMFMAFn(MI)) {
|
|
|
|
|
int W = getWaitStatesSince(IsMFMAFn, 16);
|
|
|
|
|
if (MAI)
|
|
|
|
|
return W < (int)TSchedModel.computeInstrLatency(MAI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|