2013-03-06 02:41:32 +08:00
|
|
|
//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2013-03-06 02:41:32 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 23:54:18 +08:00
|
|
|
/// R600 Machine Scheduler interface
|
2013-03-06 02:41:32 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "R600MachineScheduler.h"
|
2014-06-13 09:32:00 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "R600InstrInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2013-05-24 01:10:37 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2015-02-13 18:01:29 +08:00
|
|
|
#include "llvm/IR/LegacyPassManager.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Pass.h"
|
2013-03-11 16:19:28 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2017-07-12 06:08:28 +08:00
|
|
|
#define DEBUG_TYPE "machine-scheduler"
|
2014-04-22 10:41:26 +08:00
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
|
|
|
|
DAG = static_cast<ScheduleDAGMILive*>(dag);
|
2016-06-24 14:30:11 +08:00
|
|
|
const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>();
|
2013-03-06 02:41:32 +08:00
|
|
|
TII = static_cast<const R600InstrInfo*>(DAG->TII);
|
|
|
|
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
|
2015-01-31 07:24:40 +08:00
|
|
|
VLIW5 = !ST.hasCaymanISA();
|
2013-03-06 02:41:32 +08:00
|
|
|
MRI = &DAG->MRI;
|
|
|
|
CurInstKind = IDOther;
|
|
|
|
CurEmitted = 0;
|
2013-06-30 03:32:43 +08:00
|
|
|
OccupedSlotsMask = 31;
|
2013-04-04 00:49:34 +08:00
|
|
|
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
|
2013-05-18 00:50:56 +08:00
|
|
|
InstKindLimit[IDOther] = 32;
|
2013-05-18 00:49:55 +08:00
|
|
|
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
|
2013-06-08 07:30:34 +08:00
|
|
|
AluInstCount = 0;
|
|
|
|
FetchInstCount = 0;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
2013-05-18 00:50:44 +08:00
|
|
|
void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
|
|
|
|
std::vector<SUnit *> &QDst)
|
2013-03-06 02:41:32 +08:00
|
|
|
{
|
2021-01-04 01:57:43 +08:00
|
|
|
llvm::append_range(QDst, QSrc);
|
2013-05-18 00:50:44 +08:00
|
|
|
QSrc.clear();
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
static unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
|
2013-06-08 07:30:34 +08:00
|
|
|
assert (GPRCount && "GPRCount cannot be 0");
|
|
|
|
return 248 / GPRCount;
|
|
|
|
}
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
|
2014-04-25 13:30:21 +08:00
|
|
|
SUnit *SU = nullptr;
|
2013-03-06 02:41:32 +08:00
|
|
|
NextInstKind = IDOther;
|
|
|
|
|
2013-05-18 00:50:56 +08:00
|
|
|
IsTopNode = false;
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
// check if we might want to switch current clause type
|
2013-05-18 00:50:56 +08:00
|
|
|
bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
|
2013-05-18 00:50:44 +08:00
|
|
|
(Available[CurInstKind].empty());
|
2013-05-18 00:49:55 +08:00
|
|
|
bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
|
2013-05-18 00:50:44 +08:00
|
|
|
(!Available[IDFetch].empty() || !Available[IDOther].empty());
|
2013-03-06 02:41:32 +08:00
|
|
|
|
2013-06-08 07:30:34 +08:00
|
|
|
if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
|
|
|
|
// We use the heuristic provided by AMD Accelerated Parallel Processing
|
|
|
|
// OpenCL Programming Guide :
|
|
|
|
// The approx. number of WF that allows TEX inst to hide ALU inst is :
|
|
|
|
// 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
|
2013-12-29 05:56:47 +08:00
|
|
|
float ALUFetchRationEstimate =
|
2013-06-08 07:30:34 +08:00
|
|
|
(AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
|
|
|
|
(FetchInstCount + Available[IDFetch].size());
|
2014-09-18 01:47:21 +08:00
|
|
|
if (ALUFetchRationEstimate == 0) {
|
2013-06-08 07:30:34 +08:00
|
|
|
AllowSwitchFromAlu = true;
|
2014-09-18 01:47:21 +08:00
|
|
|
} else {
|
|
|
|
unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");
|
2014-09-18 01:47:21 +08:00
|
|
|
// We assume the local GPR requirements to be "dominated" by the requirement
|
|
|
|
// of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
|
|
|
|
// after TEX are indeed likely to consume or generate values from/for the
|
|
|
|
// TEX clause.
|
|
|
|
// Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
|
|
|
|
// We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
|
|
|
|
// one GPR) or TmXYZW = TnXYZW (need 2 GPR).
|
|
|
|
// (TODO : use RegisterPressure)
|
|
|
|
// If we are going too use too many GPR, we flush Fetch instruction to lower
|
|
|
|
// register pressure on 128 bits regs.
|
|
|
|
unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
|
|
|
|
if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
|
|
|
|
AllowSwitchFromAlu = true;
|
|
|
|
}
|
2013-06-08 07:30:34 +08:00
|
|
|
}
|
|
|
|
|
2013-06-05 11:43:06 +08:00
|
|
|
if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
|
|
|
|
(!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
|
2013-03-06 02:41:32 +08:00
|
|
|
// try to pick ALU
|
|
|
|
SU = pickAlu();
|
2013-06-06 04:27:35 +08:00
|
|
|
if (!SU && !PhysicalRegCopy.empty()) {
|
|
|
|
SU = PhysicalRegCopy.front();
|
|
|
|
PhysicalRegCopy.erase(PhysicalRegCopy.begin());
|
|
|
|
}
|
2013-03-06 02:41:32 +08:00
|
|
|
if (SU) {
|
2013-05-18 00:49:55 +08:00
|
|
|
if (CurEmitted >= InstKindLimit[IDAlu])
|
2013-03-06 02:41:32 +08:00
|
|
|
CurEmitted = 0;
|
|
|
|
NextInstKind = IDAlu;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!SU) {
|
|
|
|
// try to pick FETCH
|
|
|
|
SU = pickOther(IDFetch);
|
|
|
|
if (SU)
|
|
|
|
NextInstKind = IDFetch;
|
|
|
|
}
|
|
|
|
|
|
|
|
// try to pick other
|
|
|
|
if (!SU) {
|
|
|
|
SU = pickOther(IDOther);
|
|
|
|
if (SU)
|
|
|
|
NextInstKind = IDOther;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(if (SU) {
|
|
|
|
dbgs() << " ** Pick node **\n";
|
2018-09-19 08:23:35 +08:00
|
|
|
DAG->dumpNode(*SU);
|
2018-05-14 20:53:11 +08:00
|
|
|
} else {
|
|
|
|
dbgs() << "NO NODE \n";
|
|
|
|
for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
|
|
|
|
const SUnit &S = DAG->SUnits[i];
|
|
|
|
if (!S.isScheduled)
|
2018-09-19 08:23:35 +08:00
|
|
|
DAG->dumpNode(S);
|
2018-05-14 20:53:11 +08:00
|
|
|
}
|
|
|
|
});
|
2013-03-06 02:41:32 +08:00
|
|
|
|
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
|
|
|
|
void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
|
|
|
|
if (NextInstKind != CurInstKind) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
|
2013-03-06 02:41:32 +08:00
|
|
|
if (NextInstKind != IDAlu)
|
2013-06-30 03:32:43 +08:00
|
|
|
OccupedSlotsMask |= 31;
|
2013-03-06 02:41:32 +08:00
|
|
|
CurEmitted = 0;
|
|
|
|
CurInstKind = NextInstKind;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CurInstKind == IDAlu) {
|
2013-06-08 07:30:34 +08:00
|
|
|
AluInstCount ++;
|
2013-03-06 02:41:32 +08:00
|
|
|
switch (getAluKind(SU)) {
|
|
|
|
case AluT_XYZW:
|
|
|
|
CurEmitted += 4;
|
|
|
|
break;
|
|
|
|
case AluDiscarded:
|
|
|
|
break;
|
|
|
|
default: {
|
|
|
|
++CurEmitted;
|
|
|
|
for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
|
|
|
|
E = SU->getInstr()->operands_end(); It != E; ++It) {
|
|
|
|
MachineOperand &MO = *It;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
|
2013-03-06 02:41:32 +08:00
|
|
|
++CurEmitted;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
++CurEmitted;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
|
2013-03-06 02:41:32 +08:00
|
|
|
|
|
|
|
if (CurInstKind != IDFetch) {
|
|
|
|
MoveUnits(Pending[IDFetch], Available[IDFetch]);
|
2013-06-08 07:30:34 +08:00
|
|
|
} else
|
|
|
|
FetchInstCount++;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
2013-06-06 04:27:35 +08:00
|
|
|
static bool
|
|
|
|
isPhysicalRegCopy(MachineInstr *MI) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (MI->getOpcode() != R600::COPY)
|
2013-06-06 04:27:35 +08:00
|
|
|
return false;
|
|
|
|
|
2020-08-21 00:46:16 +08:00
|
|
|
return !MI->getOperand(1).getReg().isVirtual();
|
2013-06-06 04:27:35 +08:00
|
|
|
}
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
|
2018-09-19 08:23:35 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
|
2018-09-19 08:23:35 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
|
2013-06-06 04:27:35 +08:00
|
|
|
if (isPhysicalRegCopy(SU->getInstr())) {
|
|
|
|
PhysicalRegCopy.push_back(SU);
|
|
|
|
return;
|
|
|
|
}
|
2013-05-18 00:50:56 +08:00
|
|
|
|
|
|
|
int IK = getInstKind(SU);
|
2013-06-05 11:43:06 +08:00
|
|
|
|
2013-05-18 00:50:56 +08:00
|
|
|
// There is no export clause, we can schedule one as soon as its ready
|
|
|
|
if (IK == IDOther)
|
|
|
|
Available[IDOther].push_back(SU);
|
|
|
|
else
|
|
|
|
Pending[IK].push_back(SU);
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
2020-08-21 00:46:16 +08:00
|
|
|
bool R600SchedStrategy::regBelongsToClass(Register Reg,
|
2013-03-06 02:41:32 +08:00
|
|
|
const TargetRegisterClass *RC) const {
|
2020-08-21 00:46:16 +08:00
|
|
|
if (!Reg.isVirtual()) {
|
2013-03-06 02:41:32 +08:00
|
|
|
return RC->contains(Reg);
|
|
|
|
} else {
|
|
|
|
return MRI->getRegClass(Reg) == RC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|
|
|
MachineInstr *MI = SU->getInstr();
|
|
|
|
|
2016-06-30 08:01:54 +08:00
|
|
|
if (TII->isTransOnly(*MI))
|
2013-06-30 03:32:43 +08:00
|
|
|
return AluTrans;
|
|
|
|
|
2016-06-22 09:53:49 +08:00
|
|
|
switch (MI->getOpcode()) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::PRED_X:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluPredX;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::INTERP_PAIR_XY:
|
|
|
|
case R600::INTERP_PAIR_ZW:
|
|
|
|
case R600::INTERP_VEC_LOAD:
|
|
|
|
case R600::DOT_4:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_XYZW;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::COPY:
|
2016-06-22 09:53:49 +08:00
|
|
|
if (MI->getOperand(1).isUndef()) {
|
|
|
|
// MI will become a KILL, don't considers it in scheduling
|
|
|
|
return AluDiscarded;
|
2013-06-28 23:46:59 +08:00
|
|
|
}
|
Fix clang -Wimplicit-fallthrough warnings across llvm, NFC
This patch should not introduce any behavior changes. It consists of
mostly one of two changes:
1. Replacing fall through comments with the LLVM_FALLTHROUGH macro
2. Inserting 'break' before falling through into a case block consisting
of only 'break'.
We were already using this warning with GCC, but its warning behaves
slightly differently. In this patch, the following differences are
relevant:
1. GCC recognizes comments that say "fall through" as annotations, clang
doesn't
2. GCC doesn't warn on "case N: foo(); default: break;", clang does
3. GCC doesn't warn when the case contains a switch, but falls through
the outer case.
I will enable the warning separately in a follow-up patch so that it can
be cleanly reverted if necessary.
Reviewers: alexfh, rsmith, lattner, rtrieu, EricWF, bollu
Differential Revision: https://reviews.llvm.org/D53950
llvm-svn: 345882
2018-11-02 03:54:45 +08:00
|
|
|
break;
|
2016-06-22 09:53:49 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2013-03-06 02:41:32 +08:00
|
|
|
|
2016-06-22 09:53:49 +08:00
|
|
|
// Does the instruction take a whole IG ?
|
|
|
|
// XXX: Is it possible to add a helper function in R600InstrInfo that can
|
|
|
|
// be used here and in R600PacketizerList::isSoloInstruction() ?
|
|
|
|
if(TII->isVector(*MI) ||
|
|
|
|
TII->isCubeOp(MI->getOpcode()) ||
|
|
|
|
TII->isReductionOp(MI->getOpcode()) ||
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MI->getOpcode() == R600::GROUP_BARRIER) {
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_XYZW;
|
|
|
|
}
|
2013-06-28 23:47:08 +08:00
|
|
|
|
2016-06-22 09:53:49 +08:00
|
|
|
if (TII->isLDSInstr(MI->getOpcode())) {
|
|
|
|
return AluT_X;
|
|
|
|
}
|
2013-03-06 02:41:32 +08:00
|
|
|
|
2016-06-22 09:53:49 +08:00
|
|
|
// Is the result already assigned to a channel ?
|
|
|
|
unsigned DestSubReg = MI->getOperand(0).getSubReg();
|
|
|
|
switch (DestSubReg) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::sub0:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_X;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::sub1:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_Y;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::sub2:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_Z;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::sub3:
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_W;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2013-03-06 02:41:32 +08:00
|
|
|
|
2016-06-22 09:53:49 +08:00
|
|
|
// Is the result already member of a X/Y/Z/W class ?
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DestReg = MI->getOperand(0).getReg();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
|
|
|
|
regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_X;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_Y;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_Z;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_W;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_XYZW;
|
|
|
|
|
|
|
|
// LDS src registers cannot be used in the Trans slot.
|
2016-06-30 08:01:54 +08:00
|
|
|
if (TII->readsLDSSrcReg(*MI))
|
2016-06-22 09:53:49 +08:00
|
|
|
return AluT_XYZW;
|
|
|
|
|
|
|
|
return AluAny;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int R600SchedStrategy::getInstKind(SUnit* SU) {
|
|
|
|
int Opcode = SU->getInstr()->getOpcode();
|
|
|
|
|
2013-05-18 00:50:37 +08:00
|
|
|
if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
|
|
|
|
return IDFetch;
|
|
|
|
|
2013-03-06 02:41:32 +08:00
|
|
|
if (TII->isALUInstr(Opcode)) {
|
|
|
|
return IDAlu;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opcode) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::PRED_X:
|
|
|
|
case R600::COPY:
|
|
|
|
case R600::CONST_COPY:
|
|
|
|
case R600::INTERP_PAIR_XY:
|
|
|
|
case R600::INTERP_PAIR_ZW:
|
|
|
|
case R600::INTERP_VEC_LOAD:
|
|
|
|
case R600::DOT_4:
|
2013-03-06 02:41:32 +08:00
|
|
|
return IDAlu;
|
|
|
|
default:
|
|
|
|
return IDOther;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-05 03:53:46 +08:00
|
|
|
SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
|
2013-03-06 02:41:32 +08:00
|
|
|
if (Q.empty())
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2013-05-18 00:50:44 +08:00
|
|
|
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
|
2013-03-06 02:41:32 +08:00
|
|
|
It != E; ++It) {
|
|
|
|
SUnit *SU = *It;
|
2013-03-14 23:50:45 +08:00
|
|
|
InstructionsGroupCandidate.push_back(SU->getInstr());
|
2016-06-30 08:01:54 +08:00
|
|
|
if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&
|
|
|
|
(!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) {
|
2013-03-14 23:50:45 +08:00
|
|
|
InstructionsGroupCandidate.pop_back();
|
2013-05-18 00:50:44 +08:00
|
|
|
Q.erase((It + 1).base());
|
2013-03-06 02:41:32 +08:00
|
|
|
return SU;
|
2013-03-14 23:50:45 +08:00
|
|
|
} else {
|
|
|
|
InstructionsGroupCandidate.pop_back();
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
}
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void R600SchedStrategy::LoadAlu() {
|
2013-05-18 00:50:44 +08:00
|
|
|
std::vector<SUnit *> &QSrc = Pending[IDAlu];
|
|
|
|
for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
|
|
|
|
AluKind AK = getAluKind(QSrc[i]);
|
|
|
|
AvailableAlus[AK].push_back(QSrc[i]);
|
|
|
|
}
|
|
|
|
QSrc.clear();
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void R600SchedStrategy::PrepareNextSlot() {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "New Slot\n");
|
2013-03-06 02:41:32 +08:00
|
|
|
assert (OccupedSlotsMask && "Slot wasn't filled");
|
|
|
|
OccupedSlotsMask = 0;
|
2018-07-12 04:59:01 +08:00
|
|
|
// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
|
2013-09-05 03:53:46 +08:00
|
|
|
// OccupedSlotsMask |= 16;
|
2013-03-14 23:50:45 +08:00
|
|
|
InstructionsGroupCandidate.clear();
|
2013-03-06 02:41:32 +08:00
|
|
|
LoadAlu();
|
|
|
|
}
|
|
|
|
|
|
|
|
void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
|
2013-06-28 23:47:08 +08:00
|
|
|
if (DstIndex == -1) {
|
|
|
|
return;
|
|
|
|
}
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DestReg = MI->getOperand(DstIndex).getReg();
|
2013-03-06 02:41:32 +08:00
|
|
|
// PressureRegister crashes if an operand is def and used in the same inst
|
|
|
|
// and we try to constraint its regclass
|
|
|
|
for (MachineInstr::mop_iterator It = MI->operands_begin(),
|
|
|
|
E = MI->operands_end(); It != E; ++It) {
|
|
|
|
MachineOperand &MO = *It;
|
|
|
|
if (MO.isReg() && !MO.isDef() &&
|
2013-06-28 23:47:08 +08:00
|
|
|
MO.getReg() == DestReg)
|
2013-03-06 02:41:32 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Constrains the regclass of DestReg to assign it to Slot
|
|
|
|
switch (Slot) {
|
|
|
|
case 0:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
|
2013-03-06 02:41:32 +08:00
|
|
|
break;
|
|
|
|
case 1:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
|
2013-03-06 02:41:32 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
|
2013-03-06 02:41:32 +08:00
|
|
|
break;
|
|
|
|
case 3:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
|
2013-03-06 02:41:32 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-05 03:53:46 +08:00
|
|
|
SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
|
2013-03-06 02:41:32 +08:00
|
|
|
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
|
2013-09-05 03:53:46 +08:00
|
|
|
SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
|
2013-05-18 00:50:44 +08:00
|
|
|
if (SlotedSU)
|
2013-03-06 02:41:32 +08:00
|
|
|
return SlotedSU;
|
2013-09-05 03:53:46 +08:00
|
|
|
SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
|
2013-05-18 00:50:44 +08:00
|
|
|
if (UnslotedSU)
|
2013-03-06 02:41:32 +08:00
|
|
|
AssignSlot(UnslotedSU->getInstr(), Slot);
|
2013-05-18 00:50:44 +08:00
|
|
|
return UnslotedSU;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
2013-06-08 07:30:34 +08:00
|
|
|
unsigned R600SchedStrategy::AvailablesAluCount() const {
|
|
|
|
return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
|
|
|
|
AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
|
|
|
|
AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
|
2013-06-30 03:32:43 +08:00
|
|
|
AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
|
|
|
|
AvailableAlus[AluPredX].size();
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SUnit* R600SchedStrategy::pickAlu() {
|
2013-06-08 07:30:34 +08:00
|
|
|
while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
|
2013-03-06 02:41:32 +08:00
|
|
|
if (!OccupedSlotsMask) {
|
2013-05-18 00:50:56 +08:00
|
|
|
// Bottom up scheduling : predX must comes first
|
|
|
|
if (!AvailableAlus[AluPredX].empty()) {
|
2013-06-30 03:32:43 +08:00
|
|
|
OccupedSlotsMask |= 31;
|
2013-09-05 03:53:46 +08:00
|
|
|
return PopInst(AvailableAlus[AluPredX], false);
|
2013-05-18 00:50:56 +08:00
|
|
|
}
|
2013-03-06 02:41:32 +08:00
|
|
|
// Flush physical reg copies (RA will discard them)
|
|
|
|
if (!AvailableAlus[AluDiscarded].empty()) {
|
2013-06-30 03:32:43 +08:00
|
|
|
OccupedSlotsMask |= 31;
|
2013-09-05 03:53:46 +08:00
|
|
|
return PopInst(AvailableAlus[AluDiscarded], false);
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
// If there is a T_XYZW alu available, use it
|
|
|
|
if (!AvailableAlus[AluT_XYZW].empty()) {
|
2013-06-30 03:32:43 +08:00
|
|
|
OccupedSlotsMask |= 15;
|
2013-09-05 03:53:46 +08:00
|
|
|
return PopInst(AvailableAlus[AluT_XYZW], false);
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
}
|
2013-06-30 03:32:43 +08:00
|
|
|
bool TransSlotOccuped = OccupedSlotsMask & 16;
|
2013-09-05 03:53:46 +08:00
|
|
|
if (!TransSlotOccuped && VLIW5) {
|
2013-06-30 03:32:43 +08:00
|
|
|
if (!AvailableAlus[AluTrans].empty()) {
|
|
|
|
OccupedSlotsMask |= 16;
|
2013-09-05 03:53:46 +08:00
|
|
|
return PopInst(AvailableAlus[AluTrans], false);
|
|
|
|
}
|
|
|
|
SUnit *SU = AttemptFillSlot(3, true);
|
|
|
|
if (SU) {
|
|
|
|
OccupedSlotsMask |= 16;
|
|
|
|
return SU;
|
2013-06-30 03:32:43 +08:00
|
|
|
}
|
|
|
|
}
|
2013-05-18 00:50:56 +08:00
|
|
|
for (int Chan = 3; Chan > -1; --Chan) {
|
2013-03-06 02:41:32 +08:00
|
|
|
bool isOccupied = OccupedSlotsMask & (1 << Chan);
|
|
|
|
if (!isOccupied) {
|
2013-09-05 03:53:46 +08:00
|
|
|
SUnit *SU = AttemptFillSlot(Chan, false);
|
2013-03-06 02:41:32 +08:00
|
|
|
if (SU) {
|
|
|
|
OccupedSlotsMask |= (1 << Chan);
|
2013-03-14 23:50:45 +08:00
|
|
|
InstructionsGroupCandidate.push_back(SU->getInstr());
|
2013-03-06 02:41:32 +08:00
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PrepareNextSlot();
|
|
|
|
}
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SUnit* R600SchedStrategy::pickOther(int QID) {
|
2014-04-25 13:30:21 +08:00
|
|
|
SUnit *SU = nullptr;
|
2013-05-18 00:50:44 +08:00
|
|
|
std::vector<SUnit *> &AQ = Available[QID];
|
2013-03-06 02:41:32 +08:00
|
|
|
|
2013-05-18 00:50:44 +08:00
|
|
|
if (AQ.empty()) {
|
2013-03-06 02:41:32 +08:00
|
|
|
MoveUnits(Pending[QID], AQ);
|
|
|
|
}
|
2013-05-18 00:50:44 +08:00
|
|
|
if (!AQ.empty()) {
|
|
|
|
SU = AQ.back();
|
2018-06-12 06:58:32 +08:00
|
|
|
AQ.pop_back();
|
2013-03-06 02:41:32 +08:00
|
|
|
}
|
|
|
|
return SU;
|
|
|
|
}
|