2012-05-18 06:37:09 +08:00
|
|
|
//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
|
2012-01-13 14:30:30 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-01-13 14:30:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// MachineScheduler schedules machine instructions after phi elimination. It
|
|
|
|
// preserves LiveIntervals so it can be invoked before register allocation.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/CodeGen/MachineScheduler.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/BitVector.h"
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/PriorityQueue.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2020-08-07 15:09:48 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/ADT/iterator_range.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/LiveInterval.h"
|
2017-12-13 10:51:04 +08:00
|
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2013-03-10 21:11:23 +08:00
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2013-03-10 21:11:23 +08:00
|
|
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
|
|
#include "llvm/CodeGen/MachinePassRegistry.h"
|
2013-06-22 02:32:58 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2012-01-13 14:30:30 +08:00
|
|
|
#include "llvm/CodeGen/Passes.h"
|
2012-06-07 04:29:31 +08:00
|
|
|
#include "llvm/CodeGen/RegisterClassInfo.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/CodeGen/RegisterPressure.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/ScheduleDAG.h"
|
|
|
|
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
|
|
|
#include "llvm/CodeGen/ScheduleDAGMutation.h"
|
2012-11-28 13:13:24 +08:00
|
|
|
#include "llvm/CodeGen/ScheduleDFS.h"
|
2012-05-25 06:11:09 +08:00
|
|
|
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/SlotIndexes.h"
|
2018-11-30 04:03:19 +08:00
|
|
|
#include "llvm/CodeGen/TargetFrameLowering.h"
|
2017-11-08 09:01:31 +08:00
|
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
2016-05-10 11:21:59 +08:00
|
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/CodeGen/TargetSchedule.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
2018-04-30 22:59:11 +08:00
|
|
|
#include "llvm/Config/llvm-config.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/MC/LaneBitmask.h"
|
|
|
|
#include "llvm/Pass.h"
|
2012-01-13 14:30:30 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include "llvm/Support/Compiler.h"
|
2012-01-13 14:30:30 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2013-01-25 15:45:29 +08:00
|
|
|
#include "llvm/Support/GraphWriter.h"
|
2018-03-24 07:58:25 +08:00
|
|
|
#include "llvm/Support/MachineValueType.h"
|
2012-01-13 14:30:30 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-02-23 06:32:51 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
|
|
|
#include <limits>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <tuple>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
2012-01-17 14:55:07 +08:00
|
|
|
|
2012-01-13 14:30:30 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2017-05-26 05:26:32 +08:00
|
|
|
#define DEBUG_TYPE "machine-scheduler"
|
2014-04-22 10:02:50 +08:00
|
|
|
|
2020-08-07 15:09:48 +08:00
|
|
|
STATISTIC(NumClustered, "Number of load/store pairs clustered");
|
|
|
|
|
2012-09-11 08:39:15 +08:00
|
|
|
namespace llvm {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-09-11 08:39:15 +08:00
|
|
|
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
|
|
|
|
cl::desc("Force top-down list scheduling"));
|
|
|
|
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
|
|
|
|
cl::desc("Force bottom-up list scheduling"));
|
2014-08-08 05:49:44 +08:00
|
|
|
cl::opt<bool>
|
|
|
|
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
|
|
|
|
cl::desc("Print critical path length to stdout"));
|
2017-02-23 06:32:51 +08:00
|
|
|
|
[AMDGPU] Add VerifyScheduling support.
Summary:
This is cut and pasted from the corresponding GenericScheduler
functions.
Reviewers: arsenm, atrick, tstellar, vpykhtin
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68264
llvm-svn: 373346
2019-10-01 23:45:47 +08:00
|
|
|
cl::opt<bool> VerifyScheduling(
|
|
|
|
"verify-misched", cl::Hidden,
|
|
|
|
cl::desc("Verify machine instrs before and after machine scheduling"));
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
} // end namespace llvm
|
2012-03-14 12:00:41 +08:00
|
|
|
|
2012-03-07 08:18:25 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
|
|
|
|
cl::desc("Pop up a window to show MISched dags after they are processed"));
|
2012-03-20 02:38:38 +08:00
|
|
|
|
2015-09-18 05:09:59 +08:00
|
|
|
/// In some situations a few uninteresting nodes depend on nearly all other
|
|
|
|
/// nodes in the graph, provide a cutoff to hide them.
|
|
|
|
static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
|
|
|
|
cl::desc("Hide nodes with more predecessor/successor than cutoff"));
|
|
|
|
|
2012-03-20 02:38:38 +08:00
|
|
|
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
|
|
|
|
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
|
2013-12-29 05:57:02 +08:00
|
|
|
|
|
|
|
static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
|
|
|
|
cl::desc("Only schedule this function"));
|
|
|
|
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
|
2017-12-05 01:18:51 +08:00
|
|
|
cl::desc("Only schedule this MBB#"));
|
2018-09-20 04:50:49 +08:00
|
|
|
static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
|
|
|
|
cl::desc("Print schedule DAGs"));
|
2012-03-07 08:18:25 +08:00
|
|
|
#else
|
2018-09-20 04:50:49 +08:00
|
|
|
static const bool ViewMISchedDAGs = false;
|
|
|
|
static const bool PrintDAGs = false;
|
2012-03-07 08:18:25 +08:00
|
|
|
#endif // NDEBUG
|
|
|
|
|
2016-04-23 03:09:17 +08:00
|
|
|
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
|
|
|
|
/// size of the ready lists.
|
|
|
|
static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
|
|
|
|
cl::desc("Limit ready list to N instructions"), cl::init(256));
|
|
|
|
|
2013-09-05 04:59:59 +08:00
|
|
|
static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
|
|
|
|
cl::desc("Enable register pressure scheduling."), cl::init(true));
|
|
|
|
|
2013-08-24 01:48:43 +08:00
|
|
|
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
|
2013-09-10 07:31:14 +08:00
|
|
|
cl::desc("Enable cyclic critical path analysis."), cl::init(true));
|
2013-08-24 01:48:43 +08:00
|
|
|
|
2016-04-15 22:58:38 +08:00
|
|
|
static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
|
|
|
|
cl::desc("Enable memop clustering."),
|
|
|
|
cl::init(true));
|
2012-11-13 03:40:10 +08:00
|
|
|
|
2013-01-25 12:01:04 +08:00
|
|
|
// DAG subtrees must have at least this many nodes.
|
|
|
|
static const unsigned MinSubtreeSize = 8;
|
|
|
|
|
2013-11-19 08:57:56 +08:00
|
|
|
// Pin the vtables to this file.
|
|
|
|
void MachineSchedStrategy::anchor() {}
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2013-11-19 08:57:56 +08:00
|
|
|
void ScheduleDAGMutation::anchor() {}
|
|
|
|
|
2012-01-14 10:17:06 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Machine Instruction Scheduling Pass and Registry
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
MachineSchedContext::MachineSchedContext() {
|
2012-04-25 04:36:19 +08:00
|
|
|
RegClassInfo = new RegisterClassInfo();
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineSchedContext::~MachineSchedContext() {
|
|
|
|
delete RegClassInfo;
|
|
|
|
}
|
|
|
|
|
2012-01-13 14:30:30 +08:00
|
|
|
namespace {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
/// Base class for a machine scheduler class that can run at any point.
|
|
|
|
class MachineSchedulerBase : public MachineSchedContext,
|
|
|
|
public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
|
|
|
|
|
2014-04-14 08:51:57 +08:00
|
|
|
void print(raw_ostream &O, const Module* = nullptr) const override;
|
2013-12-29 05:56:47 +08:00
|
|
|
|
|
|
|
protected:
|
2015-11-03 09:53:29 +08:00
|
|
|
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
|
2013-12-29 05:56:47 +08:00
|
|
|
};
|
|
|
|
|
2012-01-17 14:55:03 +08:00
|
|
|
/// MachineScheduler runs after coalescing and before register allocation.
|
2013-12-29 05:56:47 +08:00
|
|
|
class MachineScheduler : public MachineSchedulerBase {
|
2012-01-13 14:30:30 +08:00
|
|
|
public:
|
2012-01-17 14:55:03 +08:00
|
|
|
MachineScheduler();
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction&) override;
|
2012-01-13 14:30:30 +08:00
|
|
|
|
|
|
|
static char ID; // Class identification, replacement for typeinfo
|
2013-09-20 13:14:41 +08:00
|
|
|
|
|
|
|
protected:
|
|
|
|
ScheduleDAGInstrs *createMachineScheduler();
|
2012-01-13 14:30:30 +08:00
|
|
|
};
|
2013-12-29 05:56:51 +08:00
|
|
|
|
|
|
|
/// PostMachineScheduler runs after shortly before code emission.
|
|
|
|
class PostMachineScheduler : public MachineSchedulerBase {
|
|
|
|
public:
|
|
|
|
PostMachineScheduler();
|
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
2013-12-29 05:56:51 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction&) override;
|
2013-12-29 05:56:51 +08:00
|
|
|
|
|
|
|
static char ID; // Class identification, replacement for typeinfo
|
|
|
|
|
|
|
|
protected:
|
|
|
|
ScheduleDAGInstrs *createPostMachineScheduler();
|
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end anonymous namespace
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2012-01-17 14:55:03 +08:00
|
|
|
char MachineScheduler::ID = 0;
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2012-01-17 14:55:03 +08:00
|
|
|
char &llvm::MachineSchedulerID = MachineScheduler::ID;
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2017-05-26 05:26:32 +08:00
|
|
|
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
|
2012-01-13 14:30:30 +08:00
|
|
|
"Machine Instruction Scheduler", false, false)
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
[Dominators][CodeGen] Fix MachineDominatorTree preservation in PHIElimination
Summary:
PHIElimination modifies CFG and marks MachineDominatorTree as preserved. Therefore, it the CFG changes it should also update the MDT, when available. This patch teaches PHIElimination to recalculate MDT when necessary.
This fixes the `tailmerging_in_mbp.ll` test failure discovered after switching to generic DomTree verification algorithm in MachineDominators in D67976.
Reviewers: arsenm, hliao, alex-t, rampitec, vpykhtin, grosser
Reviewed By: rampitec
Subscribers: MatzeB, wdng, hiraditya, javed.absar, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68154
llvm-svn: 373377
2019-10-02 02:27:17 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
2017-03-25 04:52:56 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
2012-01-13 14:30:30 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
2017-05-26 05:26:32 +08:00
|
|
|
INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
|
2012-01-13 14:30:30 +08:00
|
|
|
"Machine Instruction Scheduler", false, false)
|
|
|
|
|
2017-09-12 07:00:48 +08:00
|
|
|
MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
|
2012-01-17 14:55:03 +08:00
|
|
|
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
|
2012-01-13 14:30:30 +08:00
|
|
|
}
|
|
|
|
|
2012-01-17 14:55:03 +08:00
|
|
|
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
|
2012-01-13 14:30:30 +08:00
|
|
|
AU.setPreservesCFG();
|
[Dominators][CodeGen] Fix MachineDominatorTree preservation in PHIElimination
Summary:
PHIElimination modifies CFG and marks MachineDominatorTree as preserved. Therefore, it the CFG changes it should also update the MDT, when available. This patch teaches PHIElimination to recalculate MDT when necessary.
This fixes the `tailmerging_in_mbp.ll` test failure discovered after switching to generic DomTree verification algorithm in MachineDominators in D67976.
Reviewers: arsenm, hliao, alex-t, rampitec, vpykhtin, grosser
Reviewed By: rampitec
Subscribers: MatzeB, wdng, hiraditya, javed.absar, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68154
llvm-svn: 373377
2019-10-02 02:27:17 +08:00
|
|
|
AU.addRequired<MachineDominatorTree>();
|
2012-01-13 14:30:30 +08:00
|
|
|
AU.addRequired<MachineLoopInfo>();
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
2012-03-09 08:52:20 +08:00
|
|
|
AU.addRequired<TargetPassConfig>();
|
2012-01-13 14:30:30 +08:00
|
|
|
AU.addRequired<SlotIndexes>();
|
|
|
|
AU.addPreserved<SlotIndexes>();
|
|
|
|
AU.addRequired<LiveIntervals>();
|
|
|
|
AU.addPreserved<LiveIntervals>();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:51 +08:00
|
|
|
char PostMachineScheduler::ID = 0;
|
|
|
|
|
|
|
|
char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
|
|
|
|
|
|
|
|
INITIALIZE_PASS(PostMachineScheduler, "postmisched",
|
2013-12-29 06:47:55 +08:00
|
|
|
"PostRA Machine Instruction Scheduler", false, false)
|
2013-12-29 05:56:51 +08:00
|
|
|
|
2017-09-12 07:00:48 +08:00
|
|
|
PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
|
2013-12-29 05:56:51 +08:00
|
|
|
initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.setPreservesCFG();
|
[Dominators][CodeGen] Fix MachineDominatorTree preservation in PHIElimination
Summary:
PHIElimination modifies CFG and marks MachineDominatorTree as preserved. Therefore, it the CFG changes it should also update the MDT, when available. This patch teaches PHIElimination to recalculate MDT when necessary.
This fixes the `tailmerging_in_mbp.ll` test failure discovered after switching to generic DomTree verification algorithm in MachineDominators in D67976.
Reviewers: arsenm, hliao, alex-t, rampitec, vpykhtin, grosser
Reviewed By: rampitec
Subscribers: MatzeB, wdng, hiraditya, javed.absar, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68154
llvm-svn: 373377
2019-10-02 02:27:17 +08:00
|
|
|
AU.addRequired<MachineDominatorTree>();
|
2013-12-29 05:56:51 +08:00
|
|
|
AU.addRequired<MachineLoopInfo>();
|
2019-11-05 19:54:22 +08:00
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
2013-12-29 05:56:51 +08:00
|
|
|
AU.addRequired<TargetPassConfig>();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
2018-11-10 01:19:45 +08:00
|
|
|
MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
|
|
|
|
MachineSchedRegistry::Registry;
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2012-03-09 08:52:20 +08:00
|
|
|
/// A dummy default scheduler factory indicates whether the scheduler
|
|
|
|
/// is overridden on the command line.
|
|
|
|
static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2012-03-09 08:52:20 +08:00
|
|
|
}
|
2012-01-13 14:30:30 +08:00
|
|
|
|
|
|
|
/// MachineSchedOpt allows command line selection of the scheduler.
|
|
|
|
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
|
2017-02-23 06:32:51 +08:00
|
|
|
RegisterPassParser<MachineSchedRegistry>>
|
2012-01-13 14:30:30 +08:00
|
|
|
MachineSchedOpt("misched",
|
2012-03-09 08:52:20 +08:00
|
|
|
cl::init(&useDefaultMachineSched), cl::Hidden,
|
2012-01-13 14:30:30 +08:00
|
|
|
cl::desc("Machine instruction scheduler to use"));
|
|
|
|
|
2012-03-09 08:52:20 +08:00
|
|
|
static MachineSchedRegistry
|
2012-03-14 12:00:41 +08:00
|
|
|
DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
|
2012-03-09 08:52:20 +08:00
|
|
|
useDefaultMachineSched);
|
|
|
|
|
2015-03-12 06:56:10 +08:00
|
|
|
static cl::opt<bool> EnableMachineSched(
|
|
|
|
"enable-misched",
|
|
|
|
cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
|
|
|
|
cl::Hidden);
|
|
|
|
|
2016-01-21 07:08:32 +08:00
|
|
|
static cl::opt<bool> EnablePostRAMachineSched(
|
|
|
|
"enable-post-misched",
|
|
|
|
cl::desc("Enable the post-ra machine instruction scheduling pass."),
|
|
|
|
cl::init(true), cl::Hidden);
|
|
|
|
|
2012-04-25 02:04:34 +08:00
|
|
|
/// Decrement this iterator until reaching the top or a non-debug instr.
|
2013-08-30 12:36:57 +08:00
|
|
|
static MachineBasicBlock::const_iterator
|
|
|
|
priorNonDebug(MachineBasicBlock::const_iterator I,
|
|
|
|
MachineBasicBlock::const_iterator Beg) {
|
2012-04-25 02:04:34 +08:00
|
|
|
assert(I != Beg && "reached the top of the region, cannot decrement");
|
|
|
|
while (--I != Beg) {
|
2018-05-09 10:42:00 +08:00
|
|
|
if (!I->isDebugInstr())
|
2012-04-25 02:04:34 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return I;
|
|
|
|
}
|
|
|
|
|
2013-08-30 12:36:57 +08:00
|
|
|
/// Non-const version.
|
|
|
|
static MachineBasicBlock::iterator
|
|
|
|
priorNonDebug(MachineBasicBlock::iterator I,
|
|
|
|
MachineBasicBlock::const_iterator Beg) {
|
2016-08-17 07:34:07 +08:00
|
|
|
return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
|
|
|
|
.getNonConstIterator();
|
2013-08-30 12:36:57 +08:00
|
|
|
}
|
|
|
|
|
2012-04-25 02:04:34 +08:00
|
|
|
/// If this iterator is a debug value, increment until reaching the End or a
|
|
|
|
/// non-debug instruction.
|
2013-08-31 13:17:58 +08:00
|
|
|
static MachineBasicBlock::const_iterator
|
|
|
|
nextIfDebug(MachineBasicBlock::const_iterator I,
|
|
|
|
MachineBasicBlock::const_iterator End) {
|
2012-05-18 02:35:03 +08:00
|
|
|
for(; I != End; ++I) {
|
2018-05-09 10:42:00 +08:00
|
|
|
if (!I->isDebugInstr())
|
2012-04-25 02:04:34 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return I;
|
|
|
|
}
|
|
|
|
|
2013-08-31 13:17:58 +08:00
|
|
|
/// Non-const version.
|
|
|
|
static MachineBasicBlock::iterator
|
|
|
|
nextIfDebug(MachineBasicBlock::iterator I,
|
|
|
|
MachineBasicBlock::const_iterator End) {
|
2016-08-17 07:34:07 +08:00
|
|
|
return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
|
|
|
|
.getNonConstIterator();
|
2013-08-31 13:17:58 +08:00
|
|
|
}
|
|
|
|
|
2013-09-25 01:11:19 +08:00
|
|
|
/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
|
2013-09-20 13:14:41 +08:00
|
|
|
ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
|
|
|
|
// Select the scheduler, or set the default.
|
|
|
|
MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
|
|
|
|
if (Ctor != useDefaultMachineSched)
|
|
|
|
return Ctor(this);
|
|
|
|
|
|
|
|
// Get the default scheduler set by the target for this function.
|
|
|
|
ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
|
|
|
|
if (Scheduler)
|
|
|
|
return Scheduler;
|
|
|
|
|
|
|
|
// Default to GenericScheduler.
|
2013-12-29 05:56:57 +08:00
|
|
|
return createGenericSchedLive(this);
|
2013-09-20 13:14:41 +08:00
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:51 +08:00
|
|
|
/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
|
|
|
|
/// the caller. We don't have a command line option to override the postRA
|
|
|
|
/// scheduler. The Target must configure it.
|
|
|
|
ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
|
|
|
|
// Get the postRA scheduler set by the target for this function.
|
|
|
|
ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
|
|
|
|
if (Scheduler)
|
|
|
|
return Scheduler;
|
|
|
|
|
|
|
|
// Default to GenericScheduler.
|
2013-12-29 05:56:57 +08:00
|
|
|
return createGenericSchedPostRA(this);
|
2013-12-29 05:56:51 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 12:00:38 +08:00
|
|
|
/// Top-level MachineScheduler pass driver.
|
|
|
|
///
|
|
|
|
/// Visit blocks in function order. Divide each block into scheduling regions
|
2012-03-14 12:00:41 +08:00
|
|
|
/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
|
|
|
|
/// consistent with the DAG builder, which traverses the interior of the
|
|
|
|
/// scheduling regions bottom-up.
|
2012-03-14 12:00:38 +08:00
|
|
|
///
|
|
|
|
/// This design avoids exposing scheduling boundaries to the DAG builder,
|
2012-03-14 12:00:41 +08:00
|
|
|
/// simplifying the DAG builder's support for "special" target instructions.
|
|
|
|
/// At the same time the design allows target schedulers to operate across
|
2018-06-20 13:29:26 +08:00
|
|
|
/// scheduling boundaries, for example to bundle the boundary instructions
|
2012-03-14 12:00:38 +08:00
|
|
|
/// without reordering them. This creates complexity, because the target
|
|
|
|
/// scheduler must update the RegionBegin and RegionEnd positions cached by
|
|
|
|
/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
|
|
|
|
/// design would be to split blocks at scheduling boundaries, but LLVM has a
|
|
|
|
/// general bias against block splitting purely for implementation simplicity.
|
2012-03-08 09:41:12 +08:00
|
|
|
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
|
2017-12-16 06:22:58 +08:00
|
|
|
if (skipFunction(mf.getFunction()))
|
2016-01-21 06:38:25 +08:00
|
|
|
return false;
|
|
|
|
|
2015-03-12 06:56:10 +08:00
|
|
|
if (EnableMachineSched.getNumOccurrences()) {
|
|
|
|
if (!EnableMachineSched)
|
|
|
|
return false;
|
|
|
|
} else if (!mf.getSubtarget().enableMachineScheduler())
|
|
|
|
return false;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
|
2012-05-11 05:06:21 +08:00
|
|
|
|
2012-03-08 09:41:12 +08:00
|
|
|
// Initialize the context of the pass.
|
|
|
|
MF = &mf;
|
|
|
|
MLI = &getAnalysis<MachineLoopInfo>();
|
|
|
|
MDT = &getAnalysis<MachineDominatorTree>();
|
2012-03-09 08:52:20 +08:00
|
|
|
PassConfig = &getAnalysis<TargetPassConfig>();
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
2012-03-08 09:41:12 +08:00
|
|
|
|
|
|
|
LIS = &getAnalysis<LiveIntervals>();
|
|
|
|
|
2013-03-08 13:40:34 +08:00
|
|
|
if (VerifyScheduling) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(LIS->dump());
|
2013-03-08 13:40:34 +08:00
|
|
|
MF->verify(this, "Before machine scheduling.");
|
|
|
|
}
|
2012-04-25 04:36:19 +08:00
|
|
|
RegClassInfo->runOnMachineFunction(*MF);
|
2012-04-25 01:56:43 +08:00
|
|
|
|
2013-09-20 13:14:41 +08:00
|
|
|
// Instantiate the selected scheduler for this target, function, and
|
|
|
|
// optimization level.
|
2014-03-06 13:51:42 +08:00
|
|
|
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
|
2015-11-03 09:53:29 +08:00
|
|
|
scheduleRegions(*Scheduler, false);
|
2013-12-29 05:56:47 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(LIS->dump());
|
2013-12-29 05:56:47 +08:00
|
|
|
if (VerifyScheduling)
|
|
|
|
MF->verify(this, "After machine scheduling.");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:51 +08:00
|
|
|
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
|
2017-12-16 06:22:58 +08:00
|
|
|
if (skipFunction(mf.getFunction()))
|
2014-04-01 01:43:35 +08:00
|
|
|
return false;
|
|
|
|
|
2016-01-21 07:08:32 +08:00
|
|
|
if (EnablePostRAMachineSched.getNumOccurrences()) {
|
|
|
|
if (!EnablePostRAMachineSched)
|
|
|
|
return false;
|
2019-11-05 17:10:58 +08:00
|
|
|
} else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
|
2014-06-04 15:06:27 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
|
2013-12-29 05:56:51 +08:00
|
|
|
|
|
|
|
// Initialize the context of the pass.
|
|
|
|
MF = &mf;
|
2017-08-17 16:33:44 +08:00
|
|
|
MLI = &getAnalysis<MachineLoopInfo>();
|
2013-12-29 05:56:51 +08:00
|
|
|
PassConfig = &getAnalysis<TargetPassConfig>();
|
2019-11-05 19:54:22 +08:00
|
|
|
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
2013-12-29 05:56:51 +08:00
|
|
|
|
|
|
|
if (VerifyScheduling)
|
|
|
|
MF->verify(this, "Before post machine scheduling.");
|
|
|
|
|
|
|
|
// Instantiate the selected scheduler for this target, function, and
|
|
|
|
// optimization level.
|
2014-03-06 13:51:42 +08:00
|
|
|
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
|
2015-11-03 09:53:29 +08:00
|
|
|
scheduleRegions(*Scheduler, true);
|
2013-12-29 05:56:51 +08:00
|
|
|
|
|
|
|
if (VerifyScheduling)
|
|
|
|
MF->verify(this, "After post machine scheduling.");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:57 +08:00
|
|
|
/// Return true of the given instruction should not be included in a scheduling
|
|
|
|
/// region.
|
|
|
|
///
|
|
|
|
/// MachineScheduler does not currently support scheduling across calls. To
|
|
|
|
/// handle calls, the DAG builder needs to be modified to create register
|
|
|
|
/// anti/output dependencies on the registers clobbered by the call's regmask
|
|
|
|
/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
|
|
|
|
/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
|
|
|
|
/// the boundary, but there would be no benefit to postRA scheduling across
|
|
|
|
/// calls this late anyway.
|
|
|
|
static bool isSchedBoundary(MachineBasicBlock::iterator MI,
|
|
|
|
MachineBasicBlock *MBB,
|
|
|
|
MachineFunction *MF,
|
2015-11-03 09:53:29 +08:00
|
|
|
const TargetInstrInfo *TII) {
|
2016-06-30 08:01:54 +08:00
|
|
|
return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
|
2017-08-17 16:33:44 +08:00
|
|
|
/// A region of an MBB for scheduling.
|
2017-09-13 22:07:47 +08:00
|
|
|
namespace {
|
2017-08-17 16:33:44 +08:00
|
|
|
struct SchedRegion {
|
|
|
|
/// RegionBegin is the first instruction in the scheduling region, and
|
|
|
|
/// RegionEnd is either MBB->end() or the scheduling boundary after the
|
|
|
|
/// last instruction in the scheduling region. These iterators cannot refer
|
|
|
|
/// to instructions outside of the identified scheduling region because
|
|
|
|
/// those may be reordered before scheduling this region.
|
|
|
|
MachineBasicBlock::iterator RegionBegin;
|
|
|
|
MachineBasicBlock::iterator RegionEnd;
|
|
|
|
unsigned NumRegionInstrs;
|
2017-09-12 07:00:48 +08:00
|
|
|
|
2017-08-17 16:33:44 +08:00
|
|
|
SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
|
|
|
|
unsigned N) :
|
|
|
|
RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
|
|
|
|
};
|
2017-09-13 22:07:47 +08:00
|
|
|
} // end anonymous namespace
|
2017-08-17 16:33:44 +08:00
|
|
|
|
2017-09-12 07:00:48 +08:00
|
|
|
using MBBRegionsVector = SmallVector<SchedRegion, 16>;
|
|
|
|
|
2017-08-17 16:33:44 +08:00
|
|
|
static void
|
|
|
|
getSchedRegions(MachineBasicBlock *MBB,
|
|
|
|
MBBRegionsVector &Regions,
|
|
|
|
bool RegionsTopDown) {
|
|
|
|
MachineFunction *MF = MBB->getParent();
|
|
|
|
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator I = nullptr;
|
|
|
|
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
|
|
|
|
RegionEnd != MBB->begin(); RegionEnd = I) {
|
|
|
|
|
|
|
|
// Avoid decrementing RegionEnd for blocks with no terminator.
|
|
|
|
if (RegionEnd != MBB->end() ||
|
|
|
|
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
|
|
|
|
--RegionEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The next region starts above the previous region. Look backward in the
|
|
|
|
// instruction stream until we find the nearest boundary.
|
|
|
|
unsigned NumRegionInstrs = 0;
|
|
|
|
I = RegionEnd;
|
|
|
|
for (;I != MBB->begin(); --I) {
|
|
|
|
MachineInstr &MI = *std::prev(I);
|
|
|
|
if (isSchedBoundary(&MI, &*MBB, MF, TII))
|
|
|
|
break;
|
2019-03-26 01:15:44 +08:00
|
|
|
if (!MI.isDebugInstr()) {
|
2017-08-17 16:33:44 +08:00
|
|
|
// MBB::size() uses instr_iterator to count. Here we need a bundle to
|
|
|
|
// count as a single instruction.
|
|
|
|
++NumRegionInstrs;
|
2019-03-26 01:15:44 +08:00
|
|
|
}
|
2017-08-17 16:33:44 +08:00
|
|
|
}
|
|
|
|
|
2019-03-26 01:15:44 +08:00
|
|
|
// It's possible we found a scheduling region that only has debug
|
|
|
|
// instructions. Don't bother scheduling these.
|
|
|
|
if (NumRegionInstrs != 0)
|
|
|
|
Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
|
2017-08-17 16:33:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (RegionsTopDown)
|
|
|
|
std::reverse(Regions.begin(), Regions.end());
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
/// Main driver for both MachineScheduler and PostMachineScheduler.
|
2015-11-03 09:53:29 +08:00
|
|
|
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
|
|
|
|
bool FixKillFlags) {
|
2012-03-08 09:41:12 +08:00
|
|
|
// Visit all machine basic blocks.
|
2012-04-25 01:56:43 +08:00
|
|
|
//
|
|
|
|
// TODO: Visit blocks in global postorder or postorder within the bottom-up
|
|
|
|
// loop tree. Then we can optionally compute global RegPressure.
|
2012-03-08 09:41:12 +08:00
|
|
|
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
|
|
|
|
MBB != MBBEnd; ++MBB) {
|
|
|
|
|
2015-10-10 03:40:45 +08:00
|
|
|
Scheduler.startBlock(&*MBB);
|
2012-03-09 16:02:51 +08:00
|
|
|
|
2013-12-29 05:57:02 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
|
|
|
|
continue;
|
|
|
|
if (SchedOnlyBlock.getNumOccurrences()
|
|
|
|
&& (int)SchedOnlyBlock != MBB->getNumber())
|
|
|
|
continue;
|
|
|
|
#endif
|
|
|
|
|
2017-08-17 16:33:44 +08:00
|
|
|
// Break the block into scheduling regions [I, RegionEnd). RegionEnd
|
|
|
|
// points to the scheduling boundary at the bottom of the region. The DAG
|
|
|
|
// does not include RegionEnd, but the region does (i.e. the next
|
|
|
|
// RegionEnd is above the previous RegionBegin). If the current block has
|
|
|
|
// no terminator then RegionEnd == MBB->end() for the bottom region.
|
|
|
|
//
|
|
|
|
// All the regions of MBB are first found and stored in MBBRegions, which
|
|
|
|
// will be processed (MBB) top-down if initialized with true.
|
2012-03-10 06:34:56 +08:00
|
|
|
//
|
|
|
|
// The Scheduler may insert instructions during either schedule() or
|
|
|
|
// exitRegion(), even for empty regions. So the local iterators 'I' and
|
2017-08-17 16:33:44 +08:00
|
|
|
// 'RegionEnd' are invalid across these calls. Instructions must not be
|
|
|
|
// added to other regions than the current one without updating MBBRegions.
|
|
|
|
|
|
|
|
MBBRegionsVector MBBRegions;
|
|
|
|
getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
|
|
|
|
for (MBBRegionsVector::iterator R = MBBRegions.begin();
|
|
|
|
R != MBBRegions.end(); ++R) {
|
|
|
|
MachineBasicBlock::iterator I = R->RegionBegin;
|
|
|
|
MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
|
|
|
|
unsigned NumRegionInstrs = R->NumRegionInstrs;
|
2012-03-09 16:02:51 +08:00
|
|
|
|
2012-03-08 09:41:12 +08:00
|
|
|
// Notify the scheduler of the region, even if we may skip scheduling
|
|
|
|
// it. Perhaps it still needs to be bundled.
|
2015-10-10 03:40:45 +08:00
|
|
|
Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
|
2012-03-08 09:41:12 +08:00
|
|
|
|
|
|
|
// Skip empty scheduling regions (0 or 1 schedulable instructions).
|
2014-03-02 20:27:27 +08:00
|
|
|
if (I == RegionEnd || I == std::prev(RegionEnd)) {
|
2012-03-08 09:41:12 +08:00
|
|
|
// Close the current region. Bundle the terminator if needed.
|
2012-03-10 06:34:56 +08:00
|
|
|
// This invalidates 'RegionEnd' and 'I'.
|
2013-12-29 05:56:47 +08:00
|
|
|
Scheduler.exitRegion();
|
2012-03-08 09:41:12 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
|
|
|
|
LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
|
|
|
|
<< " " << MBB->getName() << "\n From: " << *I
|
|
|
|
<< " To: ";
|
|
|
|
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
|
|
|
|
else dbgs() << "End";
|
|
|
|
dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
|
2014-08-08 05:49:44 +08:00
|
|
|
if (DumpCriticalPathLength) {
|
|
|
|
errs() << MF->getName();
|
2017-12-05 01:18:51 +08:00
|
|
|
errs() << ":%bb. " << MBB->getNumber();
|
2014-08-08 05:49:44 +08:00
|
|
|
errs() << " " << MBB->getName() << " \n";
|
|
|
|
}
|
2012-03-08 09:41:12 +08:00
|
|
|
|
2012-03-09 11:46:42 +08:00
|
|
|
// Schedule a region: possibly reorder instructions.
|
2017-08-17 16:33:44 +08:00
|
|
|
// This invalidates the original region iterators.
|
2013-12-29 05:56:47 +08:00
|
|
|
Scheduler.schedule();
|
2012-03-09 11:46:42 +08:00
|
|
|
|
|
|
|
// Close the current region.
|
2013-12-29 05:56:47 +08:00
|
|
|
Scheduler.exitRegion();
|
2012-03-08 09:41:12 +08:00
|
|
|
}
|
2013-12-29 05:56:47 +08:00
|
|
|
Scheduler.finishBlock();
|
2015-11-03 09:53:29 +08:00
|
|
|
// FIXME: Ideally, no further passes should rely on kill flags. However,
|
|
|
|
// thumb2 size reduction is currently an exception, so the PostMIScheduler
|
|
|
|
// needs to do this.
|
|
|
|
if (FixKillFlags)
|
2017-05-27 10:50:50 +08:00
|
|
|
Scheduler.fixupKills(*MBB);
|
2012-03-08 09:41:12 +08:00
|
|
|
}
|
2013-12-29 05:56:47 +08:00
|
|
|
Scheduler.finalizeSchedule();
|
2012-03-08 09:41:12 +08:00
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
|
2012-03-08 09:41:12 +08:00
|
|
|
// unimplemented
|
|
|
|
}
|
|
|
|
|
2017-10-15 22:32:27 +08:00
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
2017-06-22 06:19:17 +08:00
|
|
|
LLVM_DUMP_METHOD void ReadyQueue::dump() const {
|
2015-09-19 02:52:20 +08:00
|
|
|
dbgs() << "Queue " << Name << ": ";
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const SUnit *SU : Queue)
|
|
|
|
dbgs() << SU->NodeNum << " ";
|
2012-09-11 08:39:15 +08:00
|
|
|
dbgs() << "\n";
|
|
|
|
}
|
2017-01-28 10:02:38 +08:00
|
|
|
#endif
|
2012-03-14 12:00:41 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2013-12-29 05:56:47 +08:00
|
|
|
// ScheduleDAGMI - Basic machine instruction scheduling. This is
|
|
|
|
// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
|
|
|
|
// virtual registers.
|
|
|
|
// ===----------------------------------------------------------------------===/
|
2012-01-14 10:17:06 +08:00
|
|
|
|
2014-04-22 04:32:32 +08:00
|
|
|
// Provide a vtable anchor.
|
2017-02-23 06:32:51 +08:00
|
|
|
ScheduleDAGMI::~ScheduleDAGMI() = default;
|
2013-01-25 12:01:04 +08:00
|
|
|
|
2012-01-17 14:55:07 +08:00
|
|
|
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
|
|
|
|
/// NumPredsLeft reaches zero, release the successor node.
|
2012-05-25 06:11:09 +08:00
|
|
|
///
|
|
|
|
/// FIXME: Adjust SuccSU height based on MinLatency.
|
2012-03-14 12:00:41 +08:00
|
|
|
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
|
2012-01-17 14:55:07 +08:00
|
|
|
SUnit *SuccSU = SuccEdge->getSUnit();
|
|
|
|
|
2012-11-13 03:28:57 +08:00
|
|
|
if (SuccEdge->isWeak()) {
|
|
|
|
--SuccSU->WeakPredsLeft;
|
2012-11-13 03:40:10 +08:00
|
|
|
if (SuccEdge->isCluster())
|
|
|
|
NextClusterSucc = SuccSU;
|
2012-11-13 03:28:57 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-01-17 14:55:07 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
if (SuccSU->NumPredsLeft == 0) {
|
|
|
|
dbgs() << "*** Scheduling failed! ***\n";
|
2018-09-19 08:23:35 +08:00
|
|
|
dumpNode(*SuccSU);
|
2012-01-17 14:55:07 +08:00
|
|
|
dbgs() << " has been released too many times!\n";
|
2014-04-14 08:51:57 +08:00
|
|
|
llvm_unreachable(nullptr);
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
|
|
|
#endif
|
2014-06-07 09:48:43 +08:00
|
|
|
// SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
|
|
|
|
// CurrCycle may have advanced since then.
|
|
|
|
if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
|
|
|
|
SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
|
|
|
|
|
2012-01-17 14:55:07 +08:00
|
|
|
--SuccSU->NumPredsLeft;
|
|
|
|
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
|
2012-03-14 12:00:41 +08:00
|
|
|
SchedImpl->releaseTopNode(SuccSU);
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
|
2012-03-14 12:00:41 +08:00
|
|
|
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SDep &Succ : SU->Succs)
|
|
|
|
releaseSucc(SU, &Succ);
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
|
|
|
|
/// NumSuccsLeft reaches zero, release the predecessor node.
|
2012-05-25 06:11:09 +08:00
|
|
|
///
|
|
|
|
/// FIXME: Adjust PredSU height based on MinLatency.
|
2012-03-14 12:00:41 +08:00
|
|
|
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
|
|
|
|
SUnit *PredSU = PredEdge->getSUnit();
|
|
|
|
|
2012-11-13 03:28:57 +08:00
|
|
|
if (PredEdge->isWeak()) {
|
|
|
|
--PredSU->WeakSuccsLeft;
|
2012-11-13 03:40:10 +08:00
|
|
|
if (PredEdge->isCluster())
|
|
|
|
NextClusterPred = PredSU;
|
2012-11-13 03:28:57 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-03-14 12:00:41 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
if (PredSU->NumSuccsLeft == 0) {
|
|
|
|
dbgs() << "*** Scheduling failed! ***\n";
|
2018-09-19 08:23:35 +08:00
|
|
|
dumpNode(*PredSU);
|
2012-03-14 12:00:41 +08:00
|
|
|
dbgs() << " has been released too many times!\n";
|
2014-04-14 08:51:57 +08:00
|
|
|
llvm_unreachable(nullptr);
|
2012-03-14 12:00:41 +08:00
|
|
|
}
|
|
|
|
#endif
|
2014-06-07 09:48:43 +08:00
|
|
|
// SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
|
|
|
|
// CurrCycle may have advanced since then.
|
|
|
|
if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
|
|
|
|
PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
|
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
--PredSU->NumSuccsLeft;
|
2020-09-21 19:18:39 +08:00
|
|
|
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
|
2012-03-14 12:00:41 +08:00
|
|
|
SchedImpl->releaseBottomNode(PredSU);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// releasePredecessors - Call releasePred on each of SU's predecessors.
|
|
|
|
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SDep &Pred : SU->Preds)
|
|
|
|
releasePred(SU, &Pred);
|
2012-03-14 12:00:41 +08:00
|
|
|
}
|
|
|
|
|
2017-08-17 16:33:44 +08:00
|
|
|
void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
|
|
|
|
ScheduleDAGInstrs::startBlock(bb);
|
|
|
|
SchedImpl->enterMBB(bb);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ScheduleDAGMI::finishBlock() {
|
|
|
|
SchedImpl->leaveMBB();
|
|
|
|
ScheduleDAGInstrs::finishBlock();
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
|
|
|
|
/// crossing a scheduling boundary. [begin, end) includes all instructions in
|
|
|
|
/// the region, including the boundary itself and single-instruction regions
|
|
|
|
/// that don't get scheduled.
|
|
|
|
void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
|
|
|
|
MachineBasicBlock::iterator begin,
|
|
|
|
MachineBasicBlock::iterator end,
|
|
|
|
unsigned regioninstrs)
|
|
|
|
{
|
|
|
|
ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
|
|
|
|
|
|
|
|
SchedImpl->initPolicy(begin, end, regioninstrs);
|
|
|
|
}
|
|
|
|
|
2013-04-13 14:07:40 +08:00
|
|
|
/// This is normally called from the main scheduler loop but may also be invoked
|
|
|
|
/// by the scheduling strategy to perform additional code motion.
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMI::moveInstruction(
|
|
|
|
MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
|
2012-05-18 02:35:03 +08:00
|
|
|
// Advance RegionBegin if the first instruction moves down.
|
2012-03-21 12:12:10 +08:00
|
|
|
if (&*RegionBegin == MI)
|
2012-05-18 02:35:03 +08:00
|
|
|
++RegionBegin;
|
|
|
|
|
|
|
|
// Update the instruction stream.
|
2012-03-14 12:00:41 +08:00
|
|
|
BB->splice(InsertPos, BB, MI);
|
2012-05-18 02:35:03 +08:00
|
|
|
|
|
|
|
// Update LiveIntervals
|
2013-12-29 05:56:47 +08:00
|
|
|
if (LIS)
|
2016-02-28 04:14:29 +08:00
|
|
|
LIS->handleMove(*MI, /*UpdateFlags=*/true);
|
2012-05-18 02:35:03 +08:00
|
|
|
|
|
|
|
// Recede RegionBegin if an instruction moves above the first.
|
2012-03-14 12:00:41 +08:00
|
|
|
if (RegionBegin == InsertPos)
|
|
|
|
RegionBegin = MI;
|
|
|
|
}
|
|
|
|
|
2012-03-21 12:12:07 +08:00
|
|
|
bool ScheduleDAGMI::checkSchedLimit() {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
|
|
|
|
CurrentTop = CurrentBottom;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
++NumInstrsScheduled;
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
/// Per-region scheduling driver, called back from
|
|
|
|
/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
|
|
|
|
/// does not consider liveness or register pressure. It is useful for PostRA
|
|
|
|
/// scheduling and potentially other custom schedulers.
|
|
|
|
void ScheduleDAGMI::schedule() {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
|
|
|
|
LLVM_DEBUG(SchedImpl->dumpPolicy());
|
2015-09-19 02:52:20 +08:00
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
// Build the DAG.
|
|
|
|
buildSchedGraph(AA);
|
|
|
|
|
|
|
|
postprocessDAG();
|
|
|
|
|
|
|
|
SmallVector<SUnit*, 8> TopRoots, BotRoots;
|
|
|
|
findRootsAndBiasEdges(TopRoots, BotRoots);
|
|
|
|
|
2018-09-19 08:23:35 +08:00
|
|
|
LLVM_DEBUG(dump());
|
2018-09-20 04:50:49 +08:00
|
|
|
if (PrintDAGs) dump();
|
2013-12-29 05:56:47 +08:00
|
|
|
if (ViewMISchedDAGs) viewGraph();
|
|
|
|
|
2018-03-06 00:31:49 +08:00
|
|
|
// Initialize the strategy before modifying the DAG.
|
|
|
|
// This may initialize a DFSResult to be used for queue priority.
|
|
|
|
SchedImpl->initialize(this);
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
// Initialize ready queues now that the DAG and priority data are finalized.
|
|
|
|
initQueues(TopRoots, BotRoots);
|
|
|
|
|
|
|
|
bool IsTopNode = false;
|
2015-09-19 02:52:20 +08:00
|
|
|
while (true) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
|
2015-09-19 02:52:20 +08:00
|
|
|
SUnit *SU = SchedImpl->pickNode(IsTopNode);
|
|
|
|
if (!SU) break;
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(!SU->isScheduled && "Node already scheduled");
|
|
|
|
if (!checkSchedLimit())
|
|
|
|
break;
|
|
|
|
|
|
|
|
MachineInstr *MI = SU->getInstr();
|
|
|
|
if (IsTopNode) {
|
|
|
|
assert(SU->isTopReady() && "node still has unscheduled dependencies");
|
|
|
|
if (&*CurrentTop == MI)
|
|
|
|
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
|
|
|
|
else
|
|
|
|
moveInstruction(MI, CurrentTop);
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
|
|
|
|
MachineBasicBlock::iterator priorII =
|
|
|
|
priorNonDebug(CurrentBottom, CurrentTop);
|
|
|
|
if (&*priorII == MI)
|
|
|
|
CurrentBottom = priorII;
|
|
|
|
else {
|
|
|
|
if (&*CurrentTop == MI)
|
|
|
|
CurrentTop = nextIfDebug(++CurrentTop, priorII);
|
|
|
|
moveInstruction(MI, CurrentBottom);
|
|
|
|
CurrentBottom = MI;
|
|
|
|
}
|
|
|
|
}
|
2014-06-07 09:48:43 +08:00
|
|
|
// Notify the scheduling strategy before updating the DAG.
|
2014-06-13 06:36:28 +08:00
|
|
|
// This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
|
2014-06-07 09:48:43 +08:00
|
|
|
// runs, it can then use the accurate ReadyCycle time to determine whether
|
|
|
|
// newly released nodes can move to the readyQ.
|
2013-12-29 05:56:47 +08:00
|
|
|
SchedImpl->schedNode(SU, IsTopNode);
|
2014-06-07 09:48:43 +08:00
|
|
|
|
|
|
|
updateQueues(SU, IsTopNode);
|
2013-12-29 05:56:47 +08:00
|
|
|
}
|
|
|
|
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
|
|
|
|
|
|
|
|
placeDebugValues();
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
2017-12-05 01:18:51 +08:00
|
|
|
dbgs() << "*** Final schedule for "
|
|
|
|
<< printMBBReference(*begin()->getParent()) << " ***\n";
|
|
|
|
dumpSchedule();
|
|
|
|
dbgs() << '\n';
|
|
|
|
});
|
2013-12-29 05:56:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Apply each ScheduleDAGMutation step in order.
|
|
|
|
void ScheduleDAGMI::postprocessDAG() {
|
2017-06-21 17:10:10 +08:00
|
|
|
for (auto &m : Mutations)
|
|
|
|
m->apply(this);
|
2013-12-29 05:56:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void ScheduleDAGMI::
|
|
|
|
findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
|
|
|
|
SmallVectorImpl<SUnit*> &BotRoots) {
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit &SU : SUnits) {
|
|
|
|
assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
|
2013-12-29 05:56:47 +08:00
|
|
|
|
|
|
|
// Order predecessors so DFSResult follows the critical path.
|
2017-06-21 17:10:10 +08:00
|
|
|
SU.biasCriticalPath();
|
2013-12-29 05:56:47 +08:00
|
|
|
|
|
|
|
// A SUnit is ready to top schedule if it has no predecessors.
|
2017-06-21 17:10:10 +08:00
|
|
|
if (!SU.NumPredsLeft)
|
|
|
|
TopRoots.push_back(&SU);
|
2013-12-29 05:56:47 +08:00
|
|
|
// A SUnit is ready to bottom schedule if it has no successors.
|
2017-06-21 17:10:10 +08:00
|
|
|
if (!SU.NumSuccsLeft)
|
|
|
|
BotRoots.push_back(&SU);
|
2013-12-29 05:56:47 +08:00
|
|
|
}
|
|
|
|
ExitSU.biasCriticalPath();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Identify DAG roots and setup scheduler queues.
|
|
|
|
void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
|
|
|
|
ArrayRef<SUnit*> BotRoots) {
|
2014-04-14 08:51:57 +08:00
|
|
|
NextClusterSucc = nullptr;
|
|
|
|
NextClusterPred = nullptr;
|
2013-12-29 05:56:47 +08:00
|
|
|
|
2020-09-21 19:18:39 +08:00
|
|
|
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
|
2013-12-29 05:56:47 +08:00
|
|
|
//
|
|
|
|
// Nodes with unreleased weak edges can still be roots.
|
|
|
|
// Release top roots in forward order.
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit *SU : TopRoots)
|
|
|
|
SchedImpl->releaseTopNode(SU);
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
// Release bottom roots in reverse order so the higher priority nodes appear
|
|
|
|
// first. This is more natural and slightly more efficient.
|
|
|
|
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
|
|
|
|
I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
|
|
|
|
SchedImpl->releaseBottomNode(*I);
|
|
|
|
}
|
|
|
|
|
2020-09-21 19:18:39 +08:00
|
|
|
releaseSuccessors(&EntrySU);
|
2013-12-29 05:56:47 +08:00
|
|
|
releasePredecessors(&ExitSU);
|
|
|
|
|
|
|
|
SchedImpl->registerRoots();
|
|
|
|
|
|
|
|
// Advance past initial DebugValues.
|
|
|
|
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
|
|
|
|
CurrentBottom = RegionEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Update scheduler queues after scheduling an instruction.
|
|
|
|
void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
|
|
|
|
// Release dependent instructions for scheduling.
|
|
|
|
if (IsTopNode)
|
|
|
|
releaseSuccessors(SU);
|
|
|
|
else
|
|
|
|
releasePredecessors(SU);
|
|
|
|
|
|
|
|
SU->isScheduled = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Reinsert any remaining debug_values, just like the PostRA scheduler.
|
|
|
|
void ScheduleDAGMI::placeDebugValues() {
|
|
|
|
// If first instruction was a DBG_VALUE then put it back.
|
|
|
|
if (FirstDbgValue) {
|
|
|
|
BB->splice(RegionBegin, BB, FirstDbgValue);
|
|
|
|
RegionBegin = FirstDbgValue;
|
|
|
|
}
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
|
2013-12-29 05:56:47 +08:00
|
|
|
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
|
2014-03-02 20:27:27 +08:00
|
|
|
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
|
2013-12-29 05:56:47 +08:00
|
|
|
MachineInstr *DbgValue = P.first;
|
|
|
|
MachineBasicBlock::iterator OrigPrevMI = P.second;
|
|
|
|
if (&*RegionBegin == DbgValue)
|
|
|
|
++RegionBegin;
|
|
|
|
BB->splice(++OrigPrevMI, BB, DbgValue);
|
2014-03-02 20:27:27 +08:00
|
|
|
if (OrigPrevMI == std::prev(RegionEnd))
|
2013-12-29 05:56:47 +08:00
|
|
|
RegionEnd = DbgValue;
|
|
|
|
}
|
|
|
|
DbgValues.clear();
|
2014-04-14 08:51:57 +08:00
|
|
|
FirstDbgValue = nullptr;
|
2013-12-29 05:56:47 +08:00
|
|
|
}
|
|
|
|
|
2017-10-15 22:32:27 +08:00
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
2017-01-28 10:02:38 +08:00
|
|
|
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
|
2013-12-29 05:56:47 +08:00
|
|
|
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
|
|
|
|
if (SUnit *SU = getSUnit(&(*MI)))
|
2018-09-19 08:23:35 +08:00
|
|
|
dumpNode(*SU);
|
2013-12-29 05:56:47 +08:00
|
|
|
else
|
|
|
|
dbgs() << "Missing SUnit\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
|
|
|
|
// preservation.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
ScheduleDAGMILive::~ScheduleDAGMILive() {
|
|
|
|
delete DFSResult;
|
|
|
|
}
|
|
|
|
|
2016-11-12 06:37:31 +08:00
|
|
|
void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
|
|
|
|
const MachineInstr &MI = *SU.getInstr();
|
|
|
|
for (const MachineOperand &MO : MI.operands()) {
|
|
|
|
if (!MO.isReg())
|
|
|
|
continue;
|
|
|
|
if (!MO.readsReg())
|
|
|
|
continue;
|
|
|
|
if (TrackLaneMasks && !MO.isUse())
|
|
|
|
continue;
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Reg = MO.getReg();
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(Reg))
|
2016-11-12 06:37:31 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Ignore re-defs.
|
|
|
|
if (TrackLaneMasks) {
|
|
|
|
bool FoundDef = false;
|
|
|
|
for (const MachineOperand &MO2 : MI.operands()) {
|
|
|
|
if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
|
|
|
|
FoundDef = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (FoundDef)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Record this local VReg use.
|
|
|
|
VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
|
|
|
|
for (; UI != VRegUses.end(); ++UI) {
|
|
|
|
if (UI->SU == &SU)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (UI == VRegUses.end())
|
2016-12-15 22:36:06 +08:00
|
|
|
VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
|
2016-11-12 06:37:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-25 01:56:43 +08:00
|
|
|
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
|
|
|
|
/// crossing a scheduling boundary. [begin, end) includes all instructions in
|
|
|
|
/// the region, including the boundary itself and single-instruction regions
|
|
|
|
/// that don't get scheduled.
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
|
2012-04-25 01:56:43 +08:00
|
|
|
MachineBasicBlock::iterator begin,
|
|
|
|
MachineBasicBlock::iterator end,
|
2013-08-24 01:48:33 +08:00
|
|
|
unsigned regioninstrs)
|
2012-04-25 01:56:43 +08:00
|
|
|
{
|
2013-12-29 05:56:47 +08:00
|
|
|
// ScheduleDAGMI initializes SchedImpl's per-region policy.
|
|
|
|
ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
|
2012-05-11 05:06:10 +08:00
|
|
|
|
|
|
|
// For convenience remember the end of the liveness region.
|
2014-03-02 20:27:27 +08:00
|
|
|
LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
|
2013-09-07 01:32:34 +08:00
|
|
|
|
2013-09-07 01:32:47 +08:00
|
|
|
SUPressureDiffs.clear();
|
|
|
|
|
2013-09-07 01:32:34 +08:00
|
|
|
ShouldTrackPressure = SchedImpl->shouldTrackPressure();
|
2016-01-20 08:23:32 +08:00
|
|
|
ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
|
|
|
|
|
2016-06-01 06:38:06 +08:00
|
|
|
assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
|
|
|
|
"ShouldTrackLaneMasks requires ShouldTrackPressure");
|
2012-05-11 05:06:10 +08:00
|
|
|
}
|
|
|
|
|
2019-09-14 11:27:38 +08:00
|
|
|
// Setup the register pressure trackers for the top scheduled and bottom
|
2012-05-11 05:06:10 +08:00
|
|
|
// scheduled regions.
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::initRegPressure() {
|
2016-11-12 06:37:31 +08:00
|
|
|
VRegUses.clear();
|
|
|
|
VRegUses.setUniverse(MRI.getNumVirtRegs());
|
|
|
|
for (SUnit &SU : SUnits)
|
|
|
|
collectVRegUses(SU);
|
|
|
|
|
2016-01-20 08:23:32 +08:00
|
|
|
TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
|
|
|
|
ShouldTrackLaneMasks, false);
|
|
|
|
BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
|
|
|
|
ShouldTrackLaneMasks, false);
|
2012-05-11 05:06:10 +08:00
|
|
|
|
|
|
|
// Close the RPTracker to finalize live ins.
|
|
|
|
RPTracker.closeRegion();
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(RPTracker.dump());
|
2012-05-25 06:11:14 +08:00
|
|
|
|
2012-05-11 05:06:10 +08:00
|
|
|
// Initialize the live ins and live outs.
|
2015-09-18 05:12:24 +08:00
|
|
|
TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
|
|
|
|
BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
|
2012-05-11 05:06:10 +08:00
|
|
|
|
|
|
|
// Close one end of the tracker so we can call
|
|
|
|
// getMaxUpward/DownwardPressureDelta before advancing across any
|
|
|
|
// instructions. This converts currently live regs into live ins/outs.
|
|
|
|
TopRPTracker.closeTop();
|
|
|
|
BotRPTracker.closeBottom();
|
|
|
|
|
2013-07-31 03:59:12 +08:00
|
|
|
BotRPTracker.initLiveThru(RPTracker);
|
|
|
|
if (!BotRPTracker.getLiveThru().empty()) {
|
|
|
|
TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Live Thru: ";
|
|
|
|
dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
|
2013-07-31 03:59:12 +08:00
|
|
|
};
|
|
|
|
|
2013-08-30 12:36:57 +08:00
|
|
|
// For each live out vreg reduce the pressure change associated with other
|
|
|
|
// uses of the same vreg below the live-out reaching def.
|
2015-09-18 05:12:24 +08:00
|
|
|
updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
|
2013-08-30 12:36:57 +08:00
|
|
|
|
2012-05-11 05:06:10 +08:00
|
|
|
// Account for liveness generated by the region boundary.
|
2013-08-30 12:36:57 +08:00
|
|
|
if (LiveRegionEnd != RegionEnd) {
|
2016-01-20 08:23:26 +08:00
|
|
|
SmallVector<RegisterMaskPair, 8> LiveUses;
|
2013-08-30 12:36:57 +08:00
|
|
|
BotRPTracker.recede(&LiveUses);
|
|
|
|
updatePressureDiffs(LiveUses);
|
|
|
|
}
|
2012-05-11 05:06:10 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Top Pressure:\n";
|
|
|
|
dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
|
|
|
|
dbgs() << "Bottom Pressure:\n";
|
|
|
|
dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
|
2015-11-14 06:30:31 +08:00
|
|
|
|
2017-12-15 11:56:57 +08:00
|
|
|
assert((BotRPTracker.getPos() == RegionEnd ||
|
2018-05-09 10:42:00 +08:00
|
|
|
(RegionEnd->isDebugInstr() &&
|
2017-12-15 11:56:57 +08:00
|
|
|
BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
|
|
|
|
"Can't find the region bottom");
|
2012-05-18 02:35:10 +08:00
|
|
|
|
|
|
|
// Cache the list of excess pressure sets in this region. This will also track
|
|
|
|
// the max pressure in the scheduled code for these sets.
|
|
|
|
RegionCriticalPSets.clear();
|
2013-01-26 05:44:27 +08:00
|
|
|
const std::vector<unsigned> &RegionPressure =
|
|
|
|
RPTracker.getPressure().MaxSetPressure;
|
2012-05-18 02:35:10 +08:00
|
|
|
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
|
2013-06-22 02:32:58 +08:00
|
|
|
unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
|
2013-06-22 02:33:01 +08:00
|
|
|
if (RegionPressure[i] > Limit) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
|
|
|
|
<< " Actual " << RegionPressure[i] << "\n");
|
2013-08-30 11:49:48 +08:00
|
|
|
RegionCriticalPSets.push_back(PressureChange(i));
|
2013-06-22 02:33:01 +08:00
|
|
|
}
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Excess PSets: ";
|
|
|
|
for (const PressureChange &RCPS
|
|
|
|
: RegionCriticalPSets) dbgs()
|
|
|
|
<< TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
|
|
|
|
dbgs() << "\n");
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::
|
2013-09-07 01:32:47 +08:00
|
|
|
updateScheduledPressure(const SUnit *SU,
|
|
|
|
const std::vector<unsigned> &NewMaxPressure) {
|
|
|
|
const PressureDiff &PDiff = getPressureDiff(SU);
|
|
|
|
unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const PressureChange &PC : PDiff) {
|
|
|
|
if (!PC.isValid())
|
2013-09-07 01:32:47 +08:00
|
|
|
break;
|
2017-06-21 17:10:10 +08:00
|
|
|
unsigned ID = PC.getPSet();
|
2013-09-07 01:32:47 +08:00
|
|
|
while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
|
|
|
|
++CritIdx;
|
|
|
|
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
|
|
|
|
if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
|
2017-02-23 20:00:34 +08:00
|
|
|
&& NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
|
2013-09-07 01:32:47 +08:00
|
|
|
RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
|
|
|
|
}
|
|
|
|
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
|
|
|
|
if (NewMaxPressure[ID] >= Limit - 2) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
|
|
|
|
<< NewMaxPressure[ID]
|
|
|
|
<< ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
|
|
|
|
<< Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
|
|
|
|
<< " livethru)\n");
|
2013-09-07 01:32:47 +08:00
|
|
|
}
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
2012-04-25 01:56:43 +08:00
|
|
|
}
|
|
|
|
|
2013-08-30 12:36:57 +08:00
|
|
|
/// Update the PressureDiff array for liveness after scheduling this
|
|
|
|
/// instruction.
|
2016-01-20 08:23:26 +08:00
|
|
|
void ScheduleDAGMILive::updatePressureDiffs(
|
|
|
|
ArrayRef<RegisterMaskPair> LiveUses) {
|
|
|
|
for (const RegisterMaskPair &P : LiveUses) {
|
|
|
|
unsigned Reg = P.RegUnit;
|
2016-01-20 08:23:32 +08:00
|
|
|
/// FIXME: Currently assuming single-use physregs.
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(Reg))
|
2013-08-30 12:36:57 +08:00
|
|
|
continue;
|
2013-09-07 01:32:39 +08:00
|
|
|
|
2016-01-20 08:23:32 +08:00
|
|
|
if (ShouldTrackLaneMasks) {
|
|
|
|
// If the register has just become live then other uses won't change
|
|
|
|
// this fact anymore => decrement pressure.
|
|
|
|
// If the register has just become dead then other uses make it come
|
|
|
|
// back to life => increment pressure.
|
2016-12-17 03:11:56 +08:00
|
|
|
bool Decrement = P.LaneMask.any();
|
2016-01-20 08:23:32 +08:00
|
|
|
|
|
|
|
for (const VReg2SUnit &V2SU
|
|
|
|
: make_range(VRegUses.find(Reg), VRegUses.end())) {
|
|
|
|
SUnit &SU = *V2SU.SU;
|
|
|
|
if (SU.isScheduled || &SU == &ExitSU)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
PressureDiff &PDiff = getPressureDiff(&SU);
|
2017-02-25 05:56:16 +08:00
|
|
|
PDiff.addPressureChange(Reg, Decrement, &MRI);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
|
|
|
|
<< printReg(Reg, TRI) << ':'
|
|
|
|
<< PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
|
|
|
|
dbgs() << " to "; PDiff.dump(*TRI););
|
2016-01-20 08:23:32 +08:00
|
|
|
}
|
|
|
|
} else {
|
2016-12-17 03:11:56 +08:00
|
|
|
assert(P.LaneMask.any());
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
|
2016-01-20 08:23:32 +08:00
|
|
|
// This may be called before CurrentBottom has been initialized. However,
|
|
|
|
// BotRPTracker must have a valid position. We want the value live into the
|
|
|
|
// instruction or live out of the block, so ask for the previous
|
|
|
|
// instruction's live-out.
|
|
|
|
const LiveInterval &LI = LIS->getInterval(Reg);
|
|
|
|
VNInfo *VNI;
|
|
|
|
MachineBasicBlock::const_iterator I =
|
|
|
|
nextIfDebug(BotRPTracker.getPos(), BB->end());
|
|
|
|
if (I == BB->end())
|
|
|
|
VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
|
|
|
|
else {
|
2016-02-27 14:40:41 +08:00
|
|
|
LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
|
2016-01-20 08:23:32 +08:00
|
|
|
VNI = LRQ.valueIn();
|
|
|
|
}
|
|
|
|
// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
|
|
|
|
assert(VNI && "No live value at use.");
|
|
|
|
for (const VReg2SUnit &V2SU
|
|
|
|
: make_range(VRegUses.find(Reg), VRegUses.end())) {
|
|
|
|
SUnit *SU = V2SU.SU;
|
|
|
|
// If this use comes before the reaching def, it cannot be a last use,
|
|
|
|
// so decrease its pressure change.
|
|
|
|
if (!SU->isScheduled && SU != &ExitSU) {
|
2016-02-27 14:40:41 +08:00
|
|
|
LiveQueryResult LRQ =
|
|
|
|
LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
|
2016-01-20 08:23:32 +08:00
|
|
|
if (LRQ.valueIn() == VNI) {
|
|
|
|
PressureDiff &PDiff = getPressureDiff(SU);
|
2017-02-25 05:56:16 +08:00
|
|
|
PDiff.addPressureChange(Reg, true, &MRI);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
|
|
|
|
<< *SU->getInstr();
|
|
|
|
dbgs() << " to "; PDiff.dump(*TRI););
|
2016-01-20 08:23:32 +08:00
|
|
|
}
|
2015-11-07 04:59:02 +08:00
|
|
|
}
|
2013-08-30 12:36:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-19 08:23:35 +08:00
|
|
|
void ScheduleDAGMILive::dump() const {
|
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
2020-09-21 19:18:39 +08:00
|
|
|
if (EntrySU.getInstr() != nullptr)
|
|
|
|
dumpNodeAll(EntrySU);
|
2018-09-19 08:23:35 +08:00
|
|
|
for (const SUnit &SU : SUnits) {
|
|
|
|
dumpNodeAll(SU);
|
|
|
|
if (ShouldTrackPressure) {
|
|
|
|
dbgs() << " Pressure Diff : ";
|
|
|
|
getPressureDiff(&SU).dump(*TRI);
|
|
|
|
}
|
|
|
|
dbgs() << " Single Issue : ";
|
|
|
|
if (SchedModel.mustBeginGroup(SU.getInstr()) &&
|
|
|
|
SchedModel.mustEndGroup(SU.getInstr()))
|
|
|
|
dbgs() << "true;";
|
|
|
|
else
|
|
|
|
dbgs() << "false;";
|
|
|
|
dbgs() << '\n';
|
|
|
|
}
|
|
|
|
if (ExitSU.getInstr() != nullptr)
|
|
|
|
dumpNodeAll(ExitSU);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
/// schedule - Called back from MachineScheduler::runOnMachineFunction
|
2012-04-25 01:56:43 +08:00
|
|
|
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
|
|
|
|
/// only includes instructions that have DAG nodes, not scheduling boundaries.
|
2012-09-11 08:39:15 +08:00
|
|
|
///
|
|
|
|
/// This is a skeletal driver, with all the functionality pushed into helpers,
|
2015-08-19 06:41:58 +08:00
|
|
|
/// so that it can be easily extended by experimental schedulers. Generally,
|
2012-09-11 08:39:15 +08:00
|
|
|
/// implementing MachineSchedStrategy should be sufficient to implement a new
|
|
|
|
/// scheduling algorithm. However, if a scheduler further subclasses
|
2013-12-29 05:56:47 +08:00
|
|
|
/// ScheduleDAGMILive then it will want to override this virtual method in order
|
|
|
|
/// to update any specialized state.
|
|
|
|
void ScheduleDAGMILive::schedule() {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
|
|
|
|
LLVM_DEBUG(SchedImpl->dumpPolicy());
|
2012-09-11 08:39:15 +08:00
|
|
|
buildDAGWithRegPressure();
|
|
|
|
|
2012-09-15 01:22:42 +08:00
|
|
|
postprocessDAG();
|
|
|
|
|
2013-01-25 14:33:57 +08:00
|
|
|
SmallVector<SUnit*, 8> TopRoots, BotRoots;
|
|
|
|
findRootsAndBiasEdges(TopRoots, BotRoots);
|
|
|
|
|
|
|
|
// Initialize the strategy before modifying the DAG.
|
|
|
|
// This may initialize a DFSResult to be used for queue priority.
|
|
|
|
SchedImpl->initialize(this);
|
|
|
|
|
2018-09-19 08:23:35 +08:00
|
|
|
LLVM_DEBUG(dump());
|
2018-09-20 04:50:49 +08:00
|
|
|
if (PrintDAGs) dump();
|
2013-01-25 14:33:57 +08:00
|
|
|
if (ViewMISchedDAGs) viewGraph();
|
2012-09-11 08:39:15 +08:00
|
|
|
|
2013-01-25 14:33:57 +08:00
|
|
|
// Initialize ready queues now that the DAG and priority data are finalized.
|
|
|
|
initQueues(TopRoots, BotRoots);
|
2012-09-11 08:39:15 +08:00
|
|
|
|
|
|
|
bool IsTopNode = false;
|
2015-09-19 02:52:20 +08:00
|
|
|
while (true) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
|
2015-09-19 02:52:20 +08:00
|
|
|
SUnit *SU = SchedImpl->pickNode(IsTopNode);
|
|
|
|
if (!SU) break;
|
|
|
|
|
2012-10-09 02:53:53 +08:00
|
|
|
assert(!SU->isScheduled && "Node already scheduled");
|
2012-09-11 08:39:15 +08:00
|
|
|
if (!checkSchedLimit())
|
|
|
|
break;
|
|
|
|
|
|
|
|
scheduleMI(SU, IsTopNode);
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
if (DFSResult) {
|
|
|
|
unsigned SubtreeID = DFSResult->getSubtreeID(SU);
|
|
|
|
if (!ScheduledTrees.test(SubtreeID)) {
|
|
|
|
ScheduledTrees.set(SubtreeID);
|
|
|
|
DFSResult->scheduleTree(SubtreeID);
|
|
|
|
SchedImpl->scheduleTree(SubtreeID);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Notify the scheduling strategy after updating the DAG.
|
|
|
|
SchedImpl->schedNode(SU, IsTopNode);
|
2015-03-27 14:10:13 +08:00
|
|
|
|
|
|
|
updateQueues(SU, IsTopNode);
|
2012-09-11 08:39:15 +08:00
|
|
|
}
|
|
|
|
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
|
|
|
|
|
|
|
|
placeDebugValues();
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
2017-12-05 01:18:51 +08:00
|
|
|
dbgs() << "*** Final schedule for "
|
|
|
|
<< printMBBReference(*begin()->getParent()) << " ***\n";
|
|
|
|
dumpSchedule();
|
|
|
|
dbgs() << '\n';
|
|
|
|
});
|
2012-09-11 08:39:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Build the DAG and setup three register pressure trackers.
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::buildDAGWithRegPressure() {
|
2013-09-05 04:59:59 +08:00
|
|
|
if (!ShouldTrackPressure) {
|
|
|
|
RPTracker.reset();
|
|
|
|
RegionCriticalPSets.clear();
|
|
|
|
buildSchedGraph(AA);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-05-11 05:06:10 +08:00
|
|
|
// Initialize the register pressure tracker used by buildSchedGraph.
|
2013-07-31 03:59:12 +08:00
|
|
|
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
|
2016-01-20 08:23:32 +08:00
|
|
|
ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
|
2012-05-11 05:06:10 +08:00
|
|
|
|
|
|
|
// Account for liveness generate by the region boundary.
|
|
|
|
if (LiveRegionEnd != RegionEnd)
|
|
|
|
RPTracker.recede();
|
2012-04-25 01:56:43 +08:00
|
|
|
|
2012-05-11 05:06:10 +08:00
|
|
|
// Build the DAG, and compute current register pressure.
|
2016-01-20 08:23:32 +08:00
|
|
|
buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
|
2012-01-17 14:55:07 +08:00
|
|
|
|
2012-05-11 05:06:10 +08:00
|
|
|
// Initialize top/bottom trackers after computing region pressure.
|
|
|
|
initRegPressure();
|
2012-09-11 08:39:15 +08:00
|
|
|
}
|
2012-05-11 05:06:10 +08:00
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::computeDFSResult() {
|
2013-01-25 12:01:04 +08:00
|
|
|
if (!DFSResult)
|
|
|
|
DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
|
|
|
|
DFSResult->clear();
|
|
|
|
ScheduledTrees.clear();
|
2013-01-25 14:33:57 +08:00
|
|
|
DFSResult->resize(SUnits.size());
|
|
|
|
DFSResult->compute(SUnits);
|
2013-01-25 12:01:04 +08:00
|
|
|
ScheduledTrees.resize(DFSResult->getNumSubtrees());
|
|
|
|
}
|
|
|
|
|
2013-08-30 02:04:49 +08:00
|
|
|
/// Compute the max cyclic critical path through the DAG. The scheduling DAG
|
|
|
|
/// only provides the critical path for single block loops. To handle loops that
|
|
|
|
/// span blocks, we could use the vreg path latencies provided by
|
|
|
|
/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
|
|
|
|
/// available for use in the scheduler.
|
|
|
|
///
|
|
|
|
/// The cyclic path estimation identifies a def-use pair that crosses the back
|
2013-08-30 10:02:12 +08:00
|
|
|
/// edge and considers the depth and height of the nodes. For example, consider
|
2013-08-30 02:04:49 +08:00
|
|
|
/// the following instruction sequence where each instruction has unit latency
|
2020-08-04 04:09:46 +08:00
|
|
|
/// and defines an eponymous virtual register:
|
2013-08-30 02:04:49 +08:00
|
|
|
///
|
|
|
|
/// a->b(a,c)->c(b)->d(c)->exit
|
|
|
|
///
|
|
|
|
/// The cyclic critical path is a two cycles: b->c->b
|
|
|
|
/// The acyclic critical path is four cycles: a->b->c->d->exit
|
|
|
|
/// LiveOutHeight = height(c) = len(c->d->exit) = 2
|
|
|
|
/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
|
|
|
|
/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
|
|
|
|
/// LiveInDepth = depth(b) = len(a->b) = 1
|
|
|
|
///
|
|
|
|
/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
|
|
|
|
/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
|
|
|
|
/// CyclicCriticalPath = min(2, 2) = 2
|
2013-12-29 05:56:47 +08:00
|
|
|
///
|
|
|
|
/// This could be relevant to PostRA scheduling, but is currently implemented
|
|
|
|
/// assuming LiveIntervals.
|
|
|
|
unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
|
2013-08-30 02:04:49 +08:00
|
|
|
// This only applies to single block loop.
|
|
|
|
if (!BB->isSuccessor(BB))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unsigned MaxCyclicLatency = 0;
|
|
|
|
// Visit each live out vreg def to find def/use pairs that cross iterations.
|
2016-01-20 08:23:26 +08:00
|
|
|
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
|
|
|
|
unsigned Reg = P.RegUnit;
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(Reg))
|
|
|
|
continue;
|
2013-08-30 02:04:49 +08:00
|
|
|
const LiveInterval &LI = LIS->getInterval(Reg);
|
|
|
|
const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
|
|
|
|
if (!DefVNI)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
|
|
|
|
const SUnit *DefSU = getSUnit(DefMI);
|
|
|
|
if (!DefSU)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned LiveOutHeight = DefSU->getHeight();
|
|
|
|
unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
|
|
|
|
// Visit all local users of the vreg def.
|
2015-10-29 11:57:17 +08:00
|
|
|
for (const VReg2SUnit &V2SU
|
|
|
|
: make_range(VRegUses.find(Reg), VRegUses.end())) {
|
|
|
|
SUnit *SU = V2SU.SU;
|
|
|
|
if (SU == &ExitSU)
|
2013-08-30 02:04:49 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Only consider uses of the phi.
|
2016-02-27 14:40:41 +08:00
|
|
|
LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
|
2013-08-30 02:04:49 +08:00
|
|
|
if (!LRQ.valueIn()->isPHIDef())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Assume that a path spanning two iterations is a cycle, which could
|
|
|
|
// overestimate in strange cases. This allows cyclic latency to be
|
|
|
|
// estimated as the minimum slack of the vreg's depth or height.
|
|
|
|
unsigned CyclicLatency = 0;
|
2015-10-29 11:57:17 +08:00
|
|
|
if (LiveOutDepth > SU->getDepth())
|
|
|
|
CyclicLatency = LiveOutDepth - SU->getDepth();
|
2013-08-30 02:04:49 +08:00
|
|
|
|
2015-10-29 11:57:17 +08:00
|
|
|
unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
|
2013-08-30 02:04:49 +08:00
|
|
|
if (LiveInHeight > LiveOutHeight) {
|
|
|
|
if (LiveInHeight - LiveOutHeight < CyclicLatency)
|
|
|
|
CyclicLatency = LiveInHeight - LiveOutHeight;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else
|
2013-08-30 02:04:49 +08:00
|
|
|
CyclicLatency = 0;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
|
|
|
|
<< SU->NodeNum << ") = " << CyclicLatency << "c\n");
|
2013-08-30 02:04:49 +08:00
|
|
|
if (CyclicLatency > MaxCyclicLatency)
|
|
|
|
MaxCyclicLatency = CyclicLatency;
|
|
|
|
}
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
|
2013-08-30 02:04:49 +08:00
|
|
|
return MaxCyclicLatency;
|
|
|
|
}
|
|
|
|
|
2016-04-29 03:17:44 +08:00
|
|
|
/// Release ExitSU predecessors and setup scheduler queues. Re-position
|
|
|
|
/// the Top RP tracker in case the region beginning has changed.
|
|
|
|
void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
|
|
|
|
ArrayRef<SUnit*> BotRoots) {
|
|
|
|
ScheduleDAGMI::initQueues(TopRoots, BotRoots);
|
|
|
|
if (ShouldTrackPressure) {
|
|
|
|
assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
|
|
|
|
TopRPTracker.setPos(CurrentTop);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-11 08:39:15 +08:00
|
|
|
/// Move an instruction and update register pressure.
|
2013-12-29 05:56:47 +08:00
|
|
|
void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
|
2012-09-11 08:39:15 +08:00
|
|
|
// Move the instruction to its new location in the instruction stream.
|
|
|
|
MachineInstr *MI = SU->getInstr();
|
2012-05-11 05:06:10 +08:00
|
|
|
|
2012-09-11 08:39:15 +08:00
|
|
|
if (IsTopNode) {
|
|
|
|
assert(SU->isTopReady() && "node still has unscheduled dependencies");
|
|
|
|
if (&*CurrentTop == MI)
|
|
|
|
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
|
|
|
|
else {
|
|
|
|
moveInstruction(MI, CurrentTop);
|
|
|
|
TopRPTracker.setPos(MI);
|
2012-03-14 12:00:41 +08:00
|
|
|
}
|
2012-09-11 08:39:15 +08:00
|
|
|
|
2013-09-05 04:59:59 +08:00
|
|
|
if (ShouldTrackPressure) {
|
|
|
|
// Update top scheduled pressure.
|
2016-01-20 08:23:32 +08:00
|
|
|
RegisterOperands RegOpers;
|
|
|
|
RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
|
|
|
|
if (ShouldTrackLaneMasks) {
|
|
|
|
// Adjust liveness and add missing dead+read-undef flags.
|
2016-02-27 14:40:41 +08:00
|
|
|
SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
|
2016-01-20 08:23:32 +08:00
|
|
|
RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
|
|
|
|
} else {
|
|
|
|
// Adjust for missing dead-def flags.
|
|
|
|
RegOpers.detectDeadDefs(*MI, *LIS);
|
|
|
|
}
|
|
|
|
|
|
|
|
TopRPTracker.advance(RegOpers);
|
2013-09-05 04:59:59 +08:00
|
|
|
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
|
|
|
|
TopRPTracker.getRegSetPressureAtPos(), TRI););
|
2015-11-07 04:59:02 +08:00
|
|
|
|
2013-09-07 01:32:47 +08:00
|
|
|
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
|
2013-09-05 04:59:59 +08:00
|
|
|
}
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2012-09-11 08:39:15 +08:00
|
|
|
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
|
|
|
|
MachineBasicBlock::iterator priorII =
|
|
|
|
priorNonDebug(CurrentBottom, CurrentTop);
|
|
|
|
if (&*priorII == MI)
|
|
|
|
CurrentBottom = priorII;
|
2012-01-17 14:55:07 +08:00
|
|
|
else {
|
2012-09-11 08:39:15 +08:00
|
|
|
if (&*CurrentTop == MI) {
|
|
|
|
CurrentTop = nextIfDebug(++CurrentTop, priorII);
|
|
|
|
TopRPTracker.setPos(CurrentTop);
|
2012-03-14 12:00:41 +08:00
|
|
|
}
|
2012-09-11 08:39:15 +08:00
|
|
|
moveInstruction(MI, CurrentBottom);
|
|
|
|
CurrentBottom = MI;
|
2018-01-24 00:04:53 +08:00
|
|
|
BotRPTracker.setPos(CurrentBottom);
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
2013-09-05 04:59:59 +08:00
|
|
|
if (ShouldTrackPressure) {
|
2016-01-20 08:23:32 +08:00
|
|
|
RegisterOperands RegOpers;
|
|
|
|
RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
|
|
|
|
if (ShouldTrackLaneMasks) {
|
|
|
|
// Adjust liveness and add missing dead+read-undef flags.
|
2016-02-27 14:40:41 +08:00
|
|
|
SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
|
2016-01-20 08:23:32 +08:00
|
|
|
RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
|
|
|
|
} else {
|
|
|
|
// Adjust for missing dead-def flags.
|
|
|
|
RegOpers.detectDeadDefs(*MI, *LIS);
|
|
|
|
}
|
|
|
|
|
2017-12-15 11:56:57 +08:00
|
|
|
if (BotRPTracker.getPos() != CurrentBottom)
|
|
|
|
BotRPTracker.recedeSkipDebugValues();
|
2016-01-20 08:23:26 +08:00
|
|
|
SmallVector<RegisterMaskPair, 8> LiveUses;
|
2016-01-20 08:23:32 +08:00
|
|
|
BotRPTracker.recede(RegOpers, &LiveUses);
|
2013-09-05 04:59:59 +08:00
|
|
|
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
|
|
|
|
BotRPTracker.getRegSetPressureAtPos(), TRI););
|
2015-11-07 04:59:02 +08:00
|
|
|
|
2013-09-07 01:32:47 +08:00
|
|
|
updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
|
2013-09-05 04:59:59 +08:00
|
|
|
updatePressureDiffs(LiveUses);
|
|
|
|
}
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
2012-09-11 08:39:15 +08:00
|
|
|
}
|
2012-04-25 02:04:37 +08:00
|
|
|
|
2012-11-13 03:52:20 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2016-04-15 22:58:38 +08:00
|
|
|
// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
|
2012-11-13 03:52:20 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2012-11-13 03:40:10 +08:00
|
|
|
namespace {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Post-process the DAG to create cluster edges between neighboring
|
2016-04-15 22:58:38 +08:00
|
|
|
/// loads or between neighboring stores.
|
|
|
|
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
|
|
|
|
struct MemOpInfo {
|
2012-11-13 03:40:10 +08:00
|
|
|
SUnit *SU;
|
2020-01-06 19:22:51 +08:00
|
|
|
SmallVector<const MachineOperand *, 4> BaseOps;
|
2016-03-10 00:00:35 +08:00
|
|
|
int64_t Offset;
|
[AMDGPU/MemOpsCluster] Let mem ops clustering logic also consider number of clustered bytes
Summary:
While clustering mem ops, AMDGPU target needs to consider number of clustered bytes
to decide on max number of mem ops that can be clustered. This patch adds support to pass
number of clustered bytes to target mem ops clustering logic.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80545
2020-06-02 01:20:29 +08:00
|
|
|
unsigned Width;
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2020-01-06 19:22:51 +08:00
|
|
|
MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
|
[AMDGPU/MemOpsCluster] Let mem ops clustering logic also consider number of clustered bytes
Summary:
While clustering mem ops, AMDGPU target needs to consider number of clustered bytes
to decide on max number of mem ops that can be clustered. This patch adds support to pass
number of clustered bytes to target mem ops clustering logic.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80545
2020-06-02 01:20:29 +08:00
|
|
|
int64_t Offset, unsigned Width)
|
|
|
|
: SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
|
|
|
|
Width(Width) {}
|
2020-01-06 19:22:51 +08:00
|
|
|
|
|
|
|
static bool Compare(const MachineOperand *const &A,
|
|
|
|
const MachineOperand *const &B) {
|
|
|
|
if (A->getType() != B->getType())
|
|
|
|
return A->getType() < B->getType();
|
|
|
|
if (A->isReg())
|
|
|
|
return A->getReg() < B->getReg();
|
|
|
|
if (A->isFI()) {
|
|
|
|
const MachineFunction &MF = *A->getParent()->getParent()->getParent();
|
2018-11-30 04:03:19 +08:00
|
|
|
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
|
|
|
|
bool StackGrowsDown = TFI.getStackGrowthDirection() ==
|
|
|
|
TargetFrameLowering::StackGrowsDown;
|
2020-01-06 19:22:51 +08:00
|
|
|
return StackGrowsDown ? A->getIndex() > B->getIndex()
|
|
|
|
: A->getIndex() < B->getIndex();
|
2018-11-30 04:03:19 +08:00
|
|
|
}
|
2018-11-28 20:00:28 +08:00
|
|
|
|
|
|
|
llvm_unreachable("MemOpClusterMutation only supports register or frame "
|
|
|
|
"index bases.");
|
2014-03-08 05:35:39 +08:00
|
|
|
}
|
2020-01-06 19:22:51 +08:00
|
|
|
|
|
|
|
bool operator<(const MemOpInfo &RHS) const {
|
|
|
|
// FIXME: Don't compare everything twice. Maybe use C++20 three way
|
|
|
|
// comparison instead when it's available.
|
|
|
|
if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
|
|
|
|
RHS.BaseOps.begin(), RHS.BaseOps.end(),
|
|
|
|
Compare))
|
|
|
|
return true;
|
|
|
|
if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
|
|
|
|
BaseOps.begin(), BaseOps.end(), Compare))
|
|
|
|
return false;
|
|
|
|
if (Offset != RHS.Offset)
|
|
|
|
return Offset < RHS.Offset;
|
|
|
|
return SU->NodeNum < RHS.SU->NodeNum;
|
|
|
|
}
|
2012-11-13 03:40:10 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
const TargetInstrInfo *TII;
|
|
|
|
const TargetRegisterInfo *TRI;
|
2016-04-15 22:58:38 +08:00
|
|
|
bool IsLoad;
|
|
|
|
|
2012-11-13 03:40:10 +08:00
|
|
|
public:
|
2016-04-15 22:58:38 +08:00
|
|
|
BaseMemOpClusterMutation(const TargetInstrInfo *tii,
|
|
|
|
const TargetRegisterInfo *tri, bool IsLoad)
|
|
|
|
: TII(tii), TRI(tri), IsLoad(IsLoad) {}
|
2012-11-13 03:40:10 +08:00
|
|
|
|
2016-03-05 23:45:23 +08:00
|
|
|
void apply(ScheduleDAGInstrs *DAGInstrs) override;
|
2016-04-15 22:58:38 +08:00
|
|
|
|
2012-11-13 03:40:10 +08:00
|
|
|
protected:
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps,
|
|
|
|
ScheduleDAGInstrs *DAG);
|
|
|
|
void collectMemOpRecords(std::vector<SUnit> &SUnits,
|
|
|
|
SmallVectorImpl<MemOpInfo> &MemOpRecords);
|
2016-04-15 22:58:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
class StoreClusterMutation : public BaseMemOpClusterMutation {
|
|
|
|
public:
|
|
|
|
StoreClusterMutation(const TargetInstrInfo *tii,
|
|
|
|
const TargetRegisterInfo *tri)
|
|
|
|
: BaseMemOpClusterMutation(tii, tri, false) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
class LoadClusterMutation : public BaseMemOpClusterMutation {
|
|
|
|
public:
|
|
|
|
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
|
|
|
|
: BaseMemOpClusterMutation(tii, tri, true) {}
|
2012-11-13 03:40:10 +08:00
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end anonymous namespace
|
2012-11-13 03:40:10 +08:00
|
|
|
|
2016-08-20 03:59:18 +08:00
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
std::unique_ptr<ScheduleDAGMutation>
|
|
|
|
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
|
|
|
|
const TargetRegisterInfo *TRI) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
|
2016-11-29 04:11:54 +08:00
|
|
|
: nullptr;
|
2016-08-20 03:59:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<ScheduleDAGMutation>
|
|
|
|
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
|
|
|
|
const TargetRegisterInfo *TRI) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
|
2016-11-29 04:11:54 +08:00
|
|
|
: nullptr;
|
2016-08-20 03:59:18 +08:00
|
|
|
}
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
} // end namespace llvm
|
2016-08-20 03:59:18 +08:00
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
// Sorting all the loads/stores first, then for each load/store, checking the
|
|
|
|
// following load/store one by one, until reach the first non-dependent one and
|
|
|
|
// call target hook to see if they can cluster.
|
2016-04-15 22:58:38 +08:00
|
|
|
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
ArrayRef<MemOpInfo> MemOpRecords, ScheduleDAGInstrs *DAG) {
|
|
|
|
// Keep track of the current cluster length and bytes for each SUnit.
|
|
|
|
DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
|
|
|
|
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
|
|
|
|
// cluster mem ops collected within `MemOpRecords` array.
|
2016-04-15 22:58:38 +08:00
|
|
|
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
// Decision to cluster mem ops is taken based on target dependent logic
|
|
|
|
auto MemOpa = MemOpRecords[Idx];
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
|
|
|
|
// Seek for the next load/store to do the cluster.
|
|
|
|
unsigned NextIdx = Idx + 1;
|
|
|
|
for (; NextIdx < End; ++NextIdx)
|
|
|
|
// Skip if MemOpb has been clustered already or has dependency with
|
|
|
|
// MemOpa.
|
|
|
|
if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
|
|
|
|
!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
|
|
|
|
!DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))
|
|
|
|
break;
|
|
|
|
if (NextIdx == End)
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
continue;
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
|
|
|
|
auto MemOpb = MemOpRecords[NextIdx];
|
|
|
|
unsigned ClusterLength = 2;
|
|
|
|
unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
|
|
|
|
if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
|
|
|
|
ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
|
|
|
|
CurrentClusterBytes =
|
|
|
|
SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
}
|
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
|
|
|
|
CurrentClusterBytes))
|
|
|
|
continue;
|
|
|
|
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
SUnit *SUa = MemOpa.SU;
|
|
|
|
SUnit *SUb = MemOpb.SU;
|
|
|
|
if (SUa->NodeNum > SUb->NodeNum)
|
|
|
|
std::swap(SUa, SUb);
|
|
|
|
|
|
|
|
// FIXME: Is this check really required?
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
|
|
|
|
<< SUb->NodeNum << ")\n");
|
2020-08-07 15:09:48 +08:00
|
|
|
++NumClustered;
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
|
2020-08-07 12:53:37 +08:00
|
|
|
if (IsLoad) {
|
|
|
|
// Copy successor edges from SUa to SUb. Interleaving computation
|
|
|
|
// dependent on SUa can prevent load combining due to register reuse.
|
|
|
|
// Predecessor edges do not need to be copied from SUb to SUa since
|
|
|
|
// nearby loads should have effectively the same inputs.
|
|
|
|
for (const SDep &Succ : SUa->Succs) {
|
|
|
|
if (Succ.getSUnit() == SUb)
|
|
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
|
|
|
|
<< ")\n");
|
|
|
|
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Copy predecessor edges from SUb to SUa to avoid the SUnits that
|
|
|
|
// SUb dependent on scheduled in-between SUb and SUa. Successor edges
|
|
|
|
// do not need to be copied from SUa to SUb since no one will depend
|
|
|
|
// on stores.
|
|
|
|
// Notice that, we don't need to care about the memory dependency as
|
|
|
|
// we won't try to cluster them if they have any memory dependency.
|
|
|
|
for (const SDep &Pred : SUb->Preds) {
|
|
|
|
if (Pred.getSUnit() == SUa)
|
|
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
|
|
|
|
<< ")\n");
|
|
|
|
DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
|
|
|
|
}
|
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.
Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar
Reviewed By: foad
Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80119
2020-05-26 18:17:03 +08:00
|
|
|
}
|
[AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class.
Summary:
`width` computation is missing for newly added `MIMG`
instruction class. Add it.
Reviewers: foad, rampitec, arsenm
Reviewed By: foad
Subscribers: MatzeB, javed.absar, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81649
2020-06-23 18:49:34 +08:00
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
|
|
|
|
CurrentClusterBytes};
|
|
|
|
|
[AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class.
Summary:
`width` computation is missing for newly added `MIMG`
instruction class. Add it.
Reviewers: foad, rampitec, arsenm
Reviewed By: foad
Subscribers: MatzeB, javed.absar, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81649
2020-06-23 18:49:34 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
|
|
|
|
<< ", Curr cluster bytes: " << CurrentClusterBytes
|
|
|
|
<< "\n");
|
2012-11-13 03:40:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
void BaseMemOpClusterMutation::collectMemOpRecords(
|
|
|
|
std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
|
|
|
|
for (auto &SU : SUnits) {
|
2017-06-21 17:10:10 +08:00
|
|
|
if ((IsLoad && !SU.getInstr()->mayLoad()) ||
|
|
|
|
(!IsLoad && !SU.getInstr()->mayStore()))
|
2012-11-13 03:40:10 +08:00
|
|
|
continue;
|
2016-04-15 22:58:38 +08:00
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
const MachineInstr &MI = *SU.getInstr();
|
|
|
|
SmallVector<const MachineOperand *, 4> BaseOps;
|
|
|
|
int64_t Offset;
|
|
|
|
bool OffsetIsScalable;
|
|
|
|
unsigned Width;
|
|
|
|
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
|
|
|
|
OffsetIsScalable, Width, TRI)) {
|
|
|
|
MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
|
|
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
|
|
|
|
<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
|
|
|
|
<< ", Width: " << Width << "\n");
|
2012-11-13 03:40:10 +08:00
|
|
|
}
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
for (auto *Op : BaseOps)
|
|
|
|
assert(Op);
|
|
|
|
#endif
|
2012-11-13 03:40:10 +08:00
|
|
|
}
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Callback from DAG postProcessing to create cluster edges for loads/stores.
|
|
|
|
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
|
|
|
|
// Collect all the clusterable loads/stores
|
|
|
|
SmallVector<MemOpInfo, 32> MemOpRecords;
|
|
|
|
collectMemOpRecords(DAG->SUnits, MemOpRecords);
|
|
|
|
|
|
|
|
if (MemOpRecords.size() < 2)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Sorting the loads/stores, so that, we can stop the cluster as early as
|
|
|
|
// possible.
|
|
|
|
llvm::sort(MemOpRecords);
|
2016-04-15 22:58:38 +08:00
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
// Trying to cluster all the neighboring loads/stores.
|
|
|
|
clusterNeighboringMemOps(MemOpRecords, DAG);
|
2012-11-13 03:40:10 +08:00
|
|
|
}
|
|
|
|
|
2013-04-24 23:54:43 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// CopyConstrain - DAG post-processing to encourage copy elimination.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
namespace {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Post-process the DAG to create weak edges from all uses of a copy to
|
2013-04-24 23:54:43 +08:00
|
|
|
/// the one use that defines the copy's source vreg, most likely an induction
|
|
|
|
/// variable increment.
|
|
|
|
class CopyConstrain : public ScheduleDAGMutation {
|
|
|
|
// Transient state.
|
|
|
|
SlotIndex RegionBeginIdx;
|
2017-09-12 07:00:48 +08:00
|
|
|
|
2013-04-25 07:19:56 +08:00
|
|
|
// RegionEndIdx is the slot index of the last non-debug instruction in the
|
|
|
|
// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
|
2013-04-24 23:54:43 +08:00
|
|
|
SlotIndex RegionEndIdx;
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2013-04-24 23:54:43 +08:00
|
|
|
public:
|
|
|
|
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
|
|
|
|
|
2016-03-05 23:45:23 +08:00
|
|
|
void apply(ScheduleDAGInstrs *DAGInstrs) override;
|
2013-04-24 23:54:43 +08:00
|
|
|
|
|
|
|
protected:
|
2013-12-29 05:56:47 +08:00
|
|
|
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
|
2013-04-24 23:54:43 +08:00
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end anonymous namespace
|
2013-04-24 23:54:43 +08:00
|
|
|
|
2016-08-20 03:59:18 +08:00
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
std::unique_ptr<ScheduleDAGMutation>
|
|
|
|
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
|
2017-02-23 06:32:51 +08:00
|
|
|
const TargetRegisterInfo *TRI) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return std::make_unique<CopyConstrain>(TII, TRI);
|
2016-08-20 03:59:18 +08:00
|
|
|
}
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
} // end namespace llvm
|
2016-08-20 03:59:18 +08:00
|
|
|
|
2013-04-24 23:54:43 +08:00
|
|
|
/// constrainLocalCopy handles two possibilities:
|
|
|
|
/// 1) Local src:
|
|
|
|
/// I0: = dst
|
|
|
|
/// I1: src = ...
|
|
|
|
/// I2: = dst
|
|
|
|
/// I3: dst = src (copy)
|
|
|
|
/// (create pred->succ edges I0->I1, I2->I1)
|
|
|
|
///
|
|
|
|
/// 2) Local copy:
|
|
|
|
/// I0: dst = src (copy)
|
|
|
|
/// I1: = dst
|
|
|
|
/// I2: src = ...
|
|
|
|
/// I3: = dst
|
|
|
|
/// (create pred->succ edges I1->I2, I3->I2)
|
|
|
|
///
|
|
|
|
/// Although the MachineScheduler is currently constrained to single blocks,
|
|
|
|
/// this algorithm should handle extended blocks. An EBB is a set of
|
|
|
|
/// contiguously numbered blocks such that the previous block in the EBB is
|
|
|
|
/// always the single predecessor.
|
2013-12-29 05:56:47 +08:00
|
|
|
void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
|
2013-04-24 23:54:43 +08:00
|
|
|
LiveIntervals *LIS = DAG->getLIS();
|
|
|
|
MachineInstr *Copy = CopySU->getInstr();
|
|
|
|
|
|
|
|
// Check for pure vreg copies.
|
2016-04-05 05:23:46 +08:00
|
|
|
const MachineOperand &SrcOp = Copy->getOperand(1);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SrcReg = SrcOp.getReg();
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
|
2013-04-24 23:54:43 +08:00
|
|
|
return;
|
|
|
|
|
2016-04-05 05:23:46 +08:00
|
|
|
const MachineOperand &DstOp = Copy->getOperand(0);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = DstOp.getReg();
|
2019-08-02 07:27:28 +08:00
|
|
|
if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
|
2013-04-24 23:54:43 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if either the dest or source is local. If it's live across a back
|
|
|
|
// edge, it's not local. Note that if both vregs are live across the back
|
|
|
|
// edge, we cannot successfully contrain the copy without cyclic scheduling.
|
2015-01-19 15:30:47 +08:00
|
|
|
// If both the copy's source and dest are local live intervals, then we
|
|
|
|
// should treat the dest as the global for the purpose of adding
|
|
|
|
// constraints. This adds edges from source's other uses to the copy.
|
|
|
|
unsigned LocalReg = SrcReg;
|
|
|
|
unsigned GlobalReg = DstReg;
|
2013-04-24 23:54:43 +08:00
|
|
|
LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
|
|
|
|
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
|
2015-01-19 15:30:47 +08:00
|
|
|
LocalReg = DstReg;
|
|
|
|
GlobalReg = SrcReg;
|
2013-04-24 23:54:43 +08:00
|
|
|
LocalLI = &LIS->getInterval(LocalReg);
|
|
|
|
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
|
|
|
|
|
|
|
|
// Find the global segment after the start of the local LI.
|
|
|
|
LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
|
|
|
|
// If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
|
|
|
|
// local live range. We could create edges from other global uses to the local
|
|
|
|
// start, but the coalescer should have already eliminated these cases, so
|
|
|
|
// don't bother dealing with it.
|
|
|
|
if (GlobalSegment == GlobalLI->end())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// If GlobalSegment is killed at the LocalLI->start, the call to find()
|
|
|
|
// returned the next global segment. But if GlobalSegment overlaps with
|
2018-06-20 13:29:26 +08:00
|
|
|
// LocalLI->start, then advance to the next segment. If a hole in GlobalLI
|
2013-04-24 23:54:43 +08:00
|
|
|
// exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
|
|
|
|
if (GlobalSegment->contains(LocalLI->beginIndex()))
|
|
|
|
++GlobalSegment;
|
|
|
|
|
|
|
|
if (GlobalSegment == GlobalLI->end())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if GlobalLI contains a hole in the vicinity of LocalLI.
|
|
|
|
if (GlobalSegment != GlobalLI->begin()) {
|
|
|
|
// Two address defs have no hole.
|
2014-03-02 20:27:27 +08:00
|
|
|
if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
|
2013-04-24 23:54:43 +08:00
|
|
|
GlobalSegment->start)) {
|
|
|
|
return;
|
|
|
|
}
|
2013-07-31 03:59:08 +08:00
|
|
|
// If the prior global segment may be defined by the same two-address
|
|
|
|
// instruction that also defines LocalLI, then can't make a hole here.
|
2014-03-02 20:27:27 +08:00
|
|
|
if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
|
2013-07-31 03:59:08 +08:00
|
|
|
LocalLI->beginIndex())) {
|
|
|
|
return;
|
|
|
|
}
|
2013-04-24 23:54:43 +08:00
|
|
|
// If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
|
|
|
|
// it would be a disconnected component in the live range.
|
2014-03-02 20:27:27 +08:00
|
|
|
assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
|
2013-04-24 23:54:43 +08:00
|
|
|
"Disconnected LRG within the scheduling region.");
|
|
|
|
}
|
|
|
|
MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
|
|
|
|
if (!GlobalDef)
|
|
|
|
return;
|
|
|
|
|
|
|
|
SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
|
|
|
|
if (!GlobalSU)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// GlobalDef is the bottom of the GlobalLI hole. Open the hole by
|
|
|
|
// constraining the uses of the last local def to precede GlobalDef.
|
|
|
|
SmallVector<SUnit*,8> LocalUses;
|
|
|
|
const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
|
|
|
|
MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
|
|
|
|
SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const SDep &Succ : LastLocalSU->Succs) {
|
|
|
|
if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
|
2013-04-24 23:54:43 +08:00
|
|
|
continue;
|
2017-06-21 17:10:10 +08:00
|
|
|
if (Succ.getSUnit() == GlobalSU)
|
2013-04-24 23:54:43 +08:00
|
|
|
continue;
|
2017-06-21 17:10:10 +08:00
|
|
|
if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
|
2013-04-24 23:54:43 +08:00
|
|
|
return;
|
2017-06-21 17:10:10 +08:00
|
|
|
LocalUses.push_back(Succ.getSUnit());
|
2013-04-24 23:54:43 +08:00
|
|
|
}
|
|
|
|
// Open the top of the GlobalLI hole by constraining any earlier global uses
|
|
|
|
// to precede the start of LocalLI.
|
|
|
|
SmallVector<SUnit*,8> GlobalUses;
|
|
|
|
MachineInstr *FirstLocalDef =
|
|
|
|
LIS->getInstructionFromIndex(LocalLI->beginIndex());
|
|
|
|
SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const SDep &Pred : GlobalSU->Preds) {
|
|
|
|
if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
|
2013-04-24 23:54:43 +08:00
|
|
|
continue;
|
2017-06-21 17:10:10 +08:00
|
|
|
if (Pred.getSUnit() == FirstLocalSU)
|
2013-04-24 23:54:43 +08:00
|
|
|
continue;
|
2017-06-21 17:10:10 +08:00
|
|
|
if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
|
2013-04-24 23:54:43 +08:00
|
|
|
return;
|
2017-06-21 17:10:10 +08:00
|
|
|
GlobalUses.push_back(Pred.getSUnit());
|
2013-04-24 23:54:43 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
|
2013-04-24 23:54:43 +08:00
|
|
|
// Add the weak edges.
|
|
|
|
for (SmallVectorImpl<SUnit*>::const_iterator
|
|
|
|
I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
|
|
|
|
<< GlobalSU->NodeNum << ")\n");
|
2013-04-24 23:54:43 +08:00
|
|
|
DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
|
|
|
|
}
|
|
|
|
for (SmallVectorImpl<SUnit*>::const_iterator
|
|
|
|
I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
|
|
|
|
<< FirstLocalSU->NodeNum << ")\n");
|
2013-04-24 23:54:43 +08:00
|
|
|
DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Callback from DAG postProcessing to create weak edges to encourage
|
2013-04-24 23:54:43 +08:00
|
|
|
/// copy elimination.
|
2016-03-05 23:45:23 +08:00
|
|
|
void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
|
|
|
|
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
|
|
|
|
|
2013-04-25 07:19:56 +08:00
|
|
|
MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
|
|
|
|
if (FirstPos == DAG->end())
|
|
|
|
return;
|
2016-02-27 14:40:41 +08:00
|
|
|
RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
|
2013-04-24 23:54:43 +08:00
|
|
|
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
|
2016-02-27 14:40:41 +08:00
|
|
|
*priorNonDebug(DAG->end(), DAG->begin()));
|
2013-04-24 23:54:43 +08:00
|
|
|
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit &SU : DAG->SUnits) {
|
|
|
|
if (!SU.getInstr()->isCopy())
|
2013-04-24 23:54:43 +08:00
|
|
|
continue;
|
|
|
|
|
2017-06-21 17:10:10 +08:00
|
|
|
constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
|
2013-04-24 23:54:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-17 14:55:03 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2013-12-07 13:59:44 +08:00
|
|
|
// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
|
|
|
|
// and possibly other custom schedulers.
|
2013-12-29 05:56:57 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2012-01-17 14:55:03 +08:00
|
|
|
|
2013-12-06 01:56:02 +08:00
|
|
|
static const unsigned InvalidCycle = ~0U;
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
SchedBoundary::~SchedBoundary() { delete HazardRec; }
|
2013-04-13 14:07:40 +08:00
|
|
|
|
2017-10-25 16:23:33 +08:00
|
|
|
/// Given a Count of resource usage and a Latency value, return true if a
|
|
|
|
/// SchedBoundary becomes resource limited.
|
2019-06-07 22:54:47 +08:00
|
|
|
/// If we are checking after scheduling a node, we should return true when
|
|
|
|
/// we just reach the resource limit.
|
2017-10-25 16:23:33 +08:00
|
|
|
static bool checkResourceLimit(unsigned LFactor, unsigned Count,
|
2019-06-07 22:54:47 +08:00
|
|
|
unsigned Latency, bool AfterSchedNode) {
|
|
|
|
int ResCntFactor = (int)(Count - (Latency * LFactor));
|
|
|
|
if (AfterSchedNode)
|
|
|
|
return ResCntFactor >= (int)LFactor;
|
|
|
|
else
|
|
|
|
return ResCntFactor > (int)LFactor;
|
2017-10-25 16:23:33 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::reset() {
|
|
|
|
// A new HazardRec is created for each DAG and owned by SchedBoundary.
|
|
|
|
// Destroying and reconstructing it is very expensive though. So keep
|
|
|
|
// invalid, placeholder HazardRecs.
|
|
|
|
if (HazardRec && HazardRec->isEnabled()) {
|
|
|
|
delete HazardRec;
|
2014-04-14 08:51:57 +08:00
|
|
|
HazardRec = nullptr;
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
|
|
|
Available.clear();
|
|
|
|
Pending.clear();
|
|
|
|
CheckPending = false;
|
|
|
|
CurrCycle = 0;
|
|
|
|
CurrMOps = 0;
|
2017-02-23 06:32:51 +08:00
|
|
|
MinReadyCycle = std::numeric_limits<unsigned>::max();
|
2013-12-07 13:59:44 +08:00
|
|
|
ExpectedLatency = 0;
|
|
|
|
DependentLatency = 0;
|
|
|
|
RetiredMOps = 0;
|
|
|
|
MaxExecutedResCount = 0;
|
|
|
|
ZoneCritResIdx = 0;
|
|
|
|
IsResourceLimited = false;
|
|
|
|
ReservedCycles.clear();
|
2019-05-11 00:54:32 +08:00
|
|
|
ReservedCyclesIndex.clear();
|
2012-05-11 05:06:19 +08:00
|
|
|
#ifndef NDEBUG
|
2013-12-29 05:56:57 +08:00
|
|
|
// Track the maximum number of stall cycles that could arise either from the
|
|
|
|
// latency of a DAG edge or the number of cycles that a processor resource is
|
|
|
|
// reserved (SchedBoundary::ReservedCycles).
|
2014-06-07 09:48:43 +08:00
|
|
|
MaxObservedStall = 0;
|
2012-05-11 05:06:19 +08:00
|
|
|
#endif
|
2013-12-07 13:59:44 +08:00
|
|
|
// Reserve a zero-count for invalid CritResIdx.
|
|
|
|
ExecutedResCounts.resize(1);
|
|
|
|
assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
|
|
|
|
}
|
2012-01-17 14:55:03 +08:00
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedRemainder::
|
2012-11-07 15:05:09 +08:00
|
|
|
init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
|
|
|
|
reset();
|
|
|
|
if (!SchedModel->hasInstrSchedModel())
|
|
|
|
return;
|
|
|
|
RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit &SU : DAG->SUnits) {
|
|
|
|
const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
|
|
|
|
RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
|
2013-06-15 13:39:19 +08:00
|
|
|
* SchedModel->getMicroOpFactor();
|
2012-11-07 15:05:09 +08:00
|
|
|
for (TargetSchedModel::ProcResIter
|
|
|
|
PI = SchedModel->getWriteProcResBegin(SC),
|
|
|
|
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
|
|
|
unsigned PIdx = PI->ProcResourceIdx;
|
|
|
|
unsigned Factor = SchedModel->getResourceFactor(PIdx);
|
|
|
|
RemainingCounts[PIdx] += (Factor * PI->Cycles);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::
|
2012-11-07 15:05:09 +08:00
|
|
|
init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
|
|
|
|
reset();
|
|
|
|
DAG = dag;
|
|
|
|
SchedModel = smodel;
|
|
|
|
Rem = rem;
|
2013-12-06 01:56:02 +08:00
|
|
|
if (SchedModel->hasInstrSchedModel()) {
|
2019-05-11 00:54:32 +08:00
|
|
|
unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
|
|
|
|
ReservedCyclesIndex.resize(ResourceCount);
|
|
|
|
ExecutedResCounts.resize(ResourceCount);
|
|
|
|
unsigned NumUnits = 0;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ResourceCount; ++i) {
|
|
|
|
ReservedCyclesIndex[i] = NumUnits;
|
|
|
|
NumUnits += SchedModel->getProcResource(i)->NumUnits;
|
|
|
|
}
|
|
|
|
|
|
|
|
ReservedCycles.resize(NumUnits, InvalidCycle);
|
2013-12-06 01:56:02 +08:00
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
|
|
|
|
/// these "soft stalls" differently than the hard stall cycles based on CPU
|
|
|
|
/// resources and computed by checkHazard(). A fully in-order model
|
|
|
|
/// (MicroOpBufferSize==0) will not make use of this since instructions are not
|
|
|
|
/// available for scheduling until they are ready. However, a weaker in-order
|
|
|
|
/// model may use this for heuristics. For example, if a processor has in-order
|
|
|
|
/// behavior when reading certain resources, this may come into play.
|
|
|
|
unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
|
|
|
|
if (!SU->isUnbuffered)
|
|
|
|
return 0;
|
2013-09-05 05:00:11 +08:00
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
|
|
|
|
if (ReadyCycle > CurrCycle)
|
|
|
|
return ReadyCycle - CurrCycle;
|
|
|
|
return 0;
|
|
|
|
}
|
2013-09-07 01:32:34 +08:00
|
|
|
|
2019-05-11 00:54:32 +08:00
|
|
|
/// Compute the next cycle at which the given processor resource unit
|
|
|
|
/// can be scheduled.
|
|
|
|
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
|
|
|
|
unsigned Cycles) {
|
|
|
|
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
|
2013-12-07 13:59:44 +08:00
|
|
|
// If this resource has never been used, always return cycle zero.
|
|
|
|
if (NextUnreserved == InvalidCycle)
|
|
|
|
return 0;
|
|
|
|
// For bottom-up scheduling add the cycles needed for the current operation.
|
|
|
|
if (!isTop())
|
|
|
|
NextUnreserved += Cycles;
|
|
|
|
return NextUnreserved;
|
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
|
2019-05-11 00:54:32 +08:00
|
|
|
/// Compute the next cycle at which the given processor resource can be
|
|
|
|
/// scheduled. Returns the next cycle and the index of the processor resource
|
|
|
|
/// instance in the reserved cycles vector.
|
|
|
|
std::pair<unsigned, unsigned>
|
|
|
|
SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
|
|
|
|
unsigned MinNextUnreserved = InvalidCycle;
|
|
|
|
unsigned InstanceIdx = 0;
|
|
|
|
unsigned StartIndex = ReservedCyclesIndex[PIdx];
|
|
|
|
unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
|
|
|
|
assert(NumberOfInstances > 0 &&
|
|
|
|
"Cannot have zero instances of a ProcResource");
|
|
|
|
|
|
|
|
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
|
|
|
|
++I) {
|
|
|
|
unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
|
|
|
|
if (MinNextUnreserved > NextUnreserved) {
|
|
|
|
InstanceIdx = I;
|
|
|
|
MinNextUnreserved = NextUnreserved;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return std::make_pair(MinNextUnreserved, InstanceIdx);
|
|
|
|
}
|
|
|
|
|
2012-06-29 11:23:24 +08:00
|
|
|
/// Does this SU have a hazard within the current instruction group.
|
|
|
|
///
|
|
|
|
/// The scheduler supports two modes of hazard recognition. The first is the
|
|
|
|
/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
|
|
|
|
/// supports highly complicated in-order reservation tables
|
2018-06-20 13:29:26 +08:00
|
|
|
/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
|
2012-06-29 11:23:24 +08:00
|
|
|
///
|
|
|
|
/// The second is a streamlined mechanism that checks for hazards based on
|
|
|
|
/// simple counters that the scheduler itself maintains. It explicitly checks
|
|
|
|
/// for instruction dispatch limitations, including the number of micro-ops that
|
|
|
|
/// can dispatch per cycle.
|
|
|
|
///
|
|
|
|
/// TODO: Also check whether the SU must start a new group.
|
2013-12-07 13:59:44 +08:00
|
|
|
bool SchedBoundary::checkHazard(SUnit *SU) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (HazardRec->isEnabled()
|
|
|
|
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
|
|
|
|
return true;
|
|
|
|
}
|
2017-03-28 04:46:37 +08:00
|
|
|
|
2012-10-10 13:43:09 +08:00
|
|
|
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
|
2013-06-15 12:49:49 +08:00
|
|
|
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
|
|
|
|
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
|
2012-06-29 11:23:24 +08:00
|
|
|
return true;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2017-03-28 04:46:37 +08:00
|
|
|
|
|
|
|
if (CurrMOps > 0 &&
|
|
|
|
((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
|
|
|
|
(!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
|
|
|
|
<< (isTop() ? "begin" : "end") << " group\n");
|
2017-03-28 04:46:37 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-12-06 01:56:02 +08:00
|
|
|
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
|
|
|
|
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
2017-10-03 17:35:04 +08:00
|
|
|
for (const MCWriteProcResEntry &PE :
|
|
|
|
make_range(SchedModel->getWriteProcResBegin(SC),
|
|
|
|
SchedModel->getWriteProcResEnd(SC))) {
|
|
|
|
unsigned ResIdx = PE.ProcResourceIdx;
|
|
|
|
unsigned Cycles = PE.Cycles;
|
2019-05-11 00:54:32 +08:00
|
|
|
unsigned NRCycle, InstanceIdx;
|
|
|
|
std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
|
2014-06-27 12:57:05 +08:00
|
|
|
if (NRCycle > CurrCycle) {
|
2014-06-27 13:09:36 +08:00
|
|
|
#ifndef NDEBUG
|
2017-10-03 17:35:04 +08:00
|
|
|
MaxObservedStall = std::max(Cycles, MaxObservedStall);
|
2014-06-27 13:09:36 +08:00
|
|
|
#endif
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
|
2019-05-11 00:54:32 +08:00
|
|
|
<< SchedModel->getResourceName(ResIdx)
|
|
|
|
<< '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
|
|
|
|
<< "=" << NRCycle << "c\n");
|
2013-12-06 01:56:02 +08:00
|
|
|
return true;
|
2014-06-27 12:57:05 +08:00
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
}
|
|
|
|
}
|
2012-06-29 11:23:24 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-06-15 13:39:19 +08:00
|
|
|
// Find the unscheduled node in ReadySUs with the highest latency.
|
2013-12-07 13:59:44 +08:00
|
|
|
unsigned SchedBoundary::
|
2013-06-15 13:39:19 +08:00
|
|
|
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
|
2014-04-14 08:51:57 +08:00
|
|
|
SUnit *LateSU = nullptr;
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned RemLatency = 0;
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit *SU : ReadySUs) {
|
|
|
|
unsigned L = getUnscheduledLatency(SU);
|
2013-06-15 12:49:44 +08:00
|
|
|
if (L > RemLatency) {
|
2012-12-19 04:52:56 +08:00
|
|
|
RemLatency = L;
|
2017-06-21 17:10:10 +08:00
|
|
|
LateSU = SU;
|
2013-06-15 12:49:44 +08:00
|
|
|
}
|
2012-12-19 04:52:56 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
if (LateSU) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
|
|
|
|
<< LateSU->NodeNum << ") " << RemLatency << "c\n");
|
2012-12-19 04:52:56 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
return RemLatency;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Count resources in this zone and the remaining unscheduled
|
|
|
|
// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
|
|
|
|
// resource index, or zero if the zone is issue limited.
|
2013-12-07 13:59:44 +08:00
|
|
|
unsigned SchedBoundary::
|
2013-06-15 13:39:19 +08:00
|
|
|
getOtherResourceCount(unsigned &OtherCritIdx) {
|
2013-07-19 16:55:18 +08:00
|
|
|
OtherCritIdx = 0;
|
2013-06-15 13:39:19 +08:00
|
|
|
if (!SchedModel->hasInstrSchedModel())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unsigned OtherCritCount = Rem->RemIssueCount
|
|
|
|
+ (RetiredMOps * SchedModel->getMicroOpFactor());
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
|
|
|
|
<< OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
|
2013-06-15 13:39:19 +08:00
|
|
|
for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
|
|
|
|
PIdx != PEnd; ++PIdx) {
|
|
|
|
unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
|
|
|
|
if (OtherCount > OtherCritCount) {
|
|
|
|
OtherCritCount = OtherCount;
|
|
|
|
OtherCritIdx = PIdx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (OtherCritIdx) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << " " << Available.getName() << " + Remain CritRes: "
|
|
|
|
<< OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
|
|
|
|
<< " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
}
|
|
|
|
return OtherCritCount;
|
|
|
|
}
|
|
|
|
|
2020-01-08 22:12:15 +08:00
|
|
|
void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
|
|
|
|
unsigned Idx) {
|
2014-06-07 09:48:43 +08:00
|
|
|
assert(SU->getInstr() && "Scheduled SUnit must have instr");
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
2014-06-13 06:36:28 +08:00
|
|
|
// ReadyCycle was been bumped up to the CurrCycle when this node was
|
|
|
|
// scheduled, but CurrCycle may have been eagerly advanced immediately after
|
|
|
|
// scheduling, so may now be greater than ReadyCycle.
|
|
|
|
if (ReadyCycle > CurrCycle)
|
|
|
|
MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
|
2014-06-07 09:48:43 +08:00
|
|
|
#endif
|
|
|
|
|
2012-05-25 06:11:09 +08:00
|
|
|
if (ReadyCycle < MinReadyCycle)
|
|
|
|
MinReadyCycle = ReadyCycle;
|
|
|
|
|
|
|
|
// Check for interlocks first. For the purpose of other heuristics, an
|
|
|
|
// instruction that cannot issue appears as if it's not in the ReadyQueue.
|
2013-06-15 13:39:19 +08:00
|
|
|
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
|
2020-01-02 03:53:16 +08:00
|
|
|
bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
|
|
|
|
checkHazard(SU) || (Available.size() >= ReadyListLimit);
|
|
|
|
|
|
|
|
if (!HazardDetected) {
|
2012-05-25 06:11:09 +08:00
|
|
|
Available.push(SU);
|
2020-01-02 03:53:16 +08:00
|
|
|
|
|
|
|
if (InPQueue)
|
|
|
|
Pending.remove(Pending.begin() + Idx);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!InPQueue)
|
|
|
|
Pending.push(SU);
|
2012-05-25 06:11:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Move the boundary of scheduled code by one cycle.
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::bumpCycle(unsigned NextCycle) {
|
2013-06-15 13:39:19 +08:00
|
|
|
if (SchedModel->getMicroOpBufferSize() == 0) {
|
2017-02-23 06:32:51 +08:00
|
|
|
assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
|
|
|
|
"MinReadyCycle uninitialized");
|
2013-06-15 13:39:19 +08:00
|
|
|
if (MinReadyCycle > NextCycle)
|
|
|
|
NextCycle = MinReadyCycle;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
// Update the current micro-ops, which will issue in the next cycle.
|
|
|
|
unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
|
|
|
|
CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
|
|
|
|
|
|
|
|
// Decrement DependentLatency based on the next cycle.
|
2013-06-15 12:49:44 +08:00
|
|
|
if ((NextCycle - CurrCycle) > DependentLatency)
|
|
|
|
DependentLatency = 0;
|
|
|
|
else
|
|
|
|
DependentLatency -= (NextCycle - CurrCycle);
|
2012-05-25 06:11:09 +08:00
|
|
|
|
|
|
|
if (!HazardRec->isEnabled()) {
|
2012-06-06 05:11:27 +08:00
|
|
|
// Bypass HazardRec virtual calls.
|
2012-05-25 06:11:09 +08:00
|
|
|
CurrCycle = NextCycle;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2012-06-06 05:11:27 +08:00
|
|
|
// Bypass getHazardType calls in case of long latency.
|
2012-05-25 06:11:09 +08:00
|
|
|
for (; CurrCycle != NextCycle; ++CurrCycle) {
|
|
|
|
if (isTop())
|
|
|
|
HazardRec->AdvanceCycle();
|
|
|
|
else
|
|
|
|
HazardRec->RecedeCycle();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CheckPending = true;
|
2013-06-15 13:39:19 +08:00
|
|
|
IsResourceLimited =
|
2017-10-25 16:23:33 +08:00
|
|
|
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
|
2019-06-07 22:54:47 +08:00
|
|
|
getScheduledLatency(), true);
|
2012-05-25 06:11:09 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
|
|
|
|
<< '\n');
|
2013-06-15 13:39:19 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
|
2013-06-15 13:39:19 +08:00
|
|
|
ExecutedResCounts[PIdx] += Count;
|
|
|
|
if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
|
|
|
|
MaxExecutedResCount = ExecutedResCounts[PIdx];
|
2012-05-25 06:11:09 +08:00
|
|
|
}
|
|
|
|
|
2012-11-07 15:05:09 +08:00
|
|
|
/// Add the given processor resource to this scheduled zone.
|
2013-06-15 13:39:19 +08:00
|
|
|
///
|
|
|
|
/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
|
|
|
|
/// during which this resource is consumed.
|
|
|
|
///
|
|
|
|
/// \return the next cycle at which the instruction may execute without
|
|
|
|
/// oversubscribing resources.
|
2013-12-07 13:59:44 +08:00
|
|
|
unsigned SchedBoundary::
|
2013-12-06 01:56:02 +08:00
|
|
|
countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
|
2012-11-07 15:05:09 +08:00
|
|
|
unsigned Factor = SchedModel->getResourceFactor(PIdx);
|
|
|
|
unsigned Count = Factor * Cycles;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
|
|
|
|
<< Cycles << "x" << Factor << "u\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
|
|
|
|
// Update Executed resources counts.
|
|
|
|
incExecutedResources(PIdx, Count);
|
2012-11-07 15:05:09 +08:00
|
|
|
assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
|
|
|
|
Rem->RemainingCounts[PIdx] -= Count;
|
|
|
|
|
2013-07-19 08:20:07 +08:00
|
|
|
// Check if this resource exceeds the current critical resource. If so, it
|
|
|
|
// becomes the critical resource.
|
|
|
|
if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
|
2013-06-15 13:39:19 +08:00
|
|
|
ZoneCritResIdx = PIdx;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " *** Critical resource "
|
|
|
|
<< SchedModel->getResourceName(PIdx) << ": "
|
|
|
|
<< getResourceCount(PIdx) / SchedModel->getLatencyFactor()
|
|
|
|
<< "c\n");
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
// For reserved resources, record the highest cycle using the resource.
|
2019-05-11 00:54:32 +08:00
|
|
|
unsigned NextAvailable, InstanceIdx;
|
|
|
|
std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
|
2013-12-06 01:56:02 +08:00
|
|
|
if (NextAvailable > CurrCycle) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Resource conflict: "
|
2019-05-11 00:54:32 +08:00
|
|
|
<< SchedModel->getResourceName(PIdx)
|
|
|
|
<< '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
|
2018-05-14 20:53:11 +08:00
|
|
|
<< " reserved until @" << NextAvailable << "\n");
|
2013-12-06 01:56:02 +08:00
|
|
|
}
|
|
|
|
return NextAvailable;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
|
|
|
|
2012-06-06 05:11:27 +08:00
|
|
|
/// Move the boundary of scheduled code by one SUnit.
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::bumpNode(SUnit *SU) {
|
2012-06-06 05:11:27 +08:00
|
|
|
// Update the reservation table.
|
|
|
|
if (HazardRec->isEnabled()) {
|
|
|
|
if (!isTop() && SU->isCall) {
|
|
|
|
// Calls are scheduled with their preceding instructions. For bottom-up
|
|
|
|
// scheduling, clear the pipeline state before emitting.
|
|
|
|
HazardRec->Reset();
|
|
|
|
}
|
|
|
|
HazardRec->EmitInstruction(SU);
|
2019-04-19 17:00:55 +08:00
|
|
|
// Scheduling an instruction may have made pending instructions available.
|
|
|
|
CheckPending = true;
|
2012-06-06 05:11:27 +08:00
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
// checkHazard should prevent scheduling multiple instructions per cycle that
|
|
|
|
// exceed the issue width.
|
2013-06-15 13:39:19 +08:00
|
|
|
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
|
|
|
unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
|
2013-12-06 16:58:22 +08:00
|
|
|
assert(
|
|
|
|
(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
|
2013-12-07 01:19:20 +08:00
|
|
|
"Cannot schedule this instruction's MicroOps in the current cycle.");
|
2013-12-06 01:56:02 +08:00
|
|
|
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
|
2013-12-06 01:56:02 +08:00
|
|
|
unsigned NextCycle = CurrCycle;
|
2013-06-15 13:39:19 +08:00
|
|
|
switch (SchedModel->getMicroOpBufferSize()) {
|
|
|
|
case 0:
|
|
|
|
assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (ReadyCycle > NextCycle) {
|
|
|
|
NextCycle = ReadyCycle;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// We don't currently model the OOO reorder buffer, so consider all
|
2013-12-06 01:55:58 +08:00
|
|
|
// scheduled MOps to be "retired". We do loosely model in-order resource
|
|
|
|
// latency. If this instruction uses an in-order resource, account for any
|
|
|
|
// likely stall cycles.
|
|
|
|
if (SU->isUnbuffered && ReadyCycle > NextCycle)
|
|
|
|
NextCycle = ReadyCycle;
|
2013-06-15 13:39:19 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
RetiredMOps += IncMOps;
|
|
|
|
|
2012-11-07 15:05:09 +08:00
|
|
|
// Update resource counts and critical resource.
|
|
|
|
if (SchedModel->hasInstrSchedModel()) {
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
|
|
|
|
assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
|
|
|
|
Rem->RemIssueCount -= DecRemIssue;
|
|
|
|
if (ZoneCritResIdx) {
|
|
|
|
// Scale scheduled micro-ops for comparing with the critical resource.
|
|
|
|
unsigned ScaledMOps =
|
|
|
|
RetiredMOps * SchedModel->getMicroOpFactor();
|
|
|
|
|
|
|
|
// If scaled micro-ops are now more than the previous critical resource by
|
|
|
|
// a full cycle, then micro-ops issue becomes critical.
|
|
|
|
if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
|
|
|
|
>= (int)SchedModel->getLatencyFactor()) {
|
|
|
|
ZoneCritResIdx = 0;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
|
|
|
|
<< ScaledMOps / SchedModel->getLatencyFactor()
|
|
|
|
<< "c\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
}
|
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
for (TargetSchedModel::ProcResIter
|
|
|
|
PI = SchedModel->getWriteProcResBegin(SC),
|
|
|
|
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned RCycle =
|
2013-12-06 01:56:02 +08:00
|
|
|
countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
|
2013-06-15 13:39:19 +08:00
|
|
|
if (RCycle > NextCycle)
|
|
|
|
NextCycle = RCycle;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
if (SU->hasReservedResource) {
|
|
|
|
// For reserved resources, record the highest cycle using the resource.
|
|
|
|
// For top-down scheduling, this is the cycle in which we schedule this
|
|
|
|
// instruction plus the number of cycles the operations reserves the
|
|
|
|
// resource. For bottom-up is it simply the instruction's cycle.
|
|
|
|
for (TargetSchedModel::ProcResIter
|
|
|
|
PI = SchedModel->getWriteProcResBegin(SC),
|
|
|
|
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
|
|
|
unsigned PIdx = PI->ProcResourceIdx;
|
2013-12-29 05:56:57 +08:00
|
|
|
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
|
2019-05-11 00:54:32 +08:00
|
|
|
unsigned ReservedUntil, InstanceIdx;
|
|
|
|
std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
|
2014-07-03 00:46:08 +08:00
|
|
|
if (isTop()) {
|
2019-05-11 00:54:32 +08:00
|
|
|
ReservedCycles[InstanceIdx] =
|
|
|
|
std::max(ReservedUntil, NextCycle + PI->Cycles);
|
|
|
|
} else
|
|
|
|
ReservedCycles[InstanceIdx] = NextCycle;
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
2013-12-06 01:56:02 +08:00
|
|
|
}
|
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
// Update ExpectedLatency and DependentLatency.
|
2013-06-15 12:49:44 +08:00
|
|
|
unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
|
|
|
|
unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
|
2013-06-15 13:39:19 +08:00
|
|
|
if (SU->getDepth() > TopLatency) {
|
2013-06-15 12:49:44 +08:00
|
|
|
TopLatency = SU->getDepth();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
|
|
|
|
<< SU->NodeNum << ") " << TopLatency << "c\n");
|
2013-06-15 13:39:19 +08:00
|
|
|
}
|
|
|
|
if (SU->getHeight() > BotLatency) {
|
2013-06-15 12:49:44 +08:00
|
|
|
BotLatency = SU->getHeight();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
|
|
|
|
<< SU->NodeNum << ") " << BotLatency << "c\n");
|
2012-06-06 05:11:27 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
// If we stall for any reason, bump the cycle.
|
2017-10-25 16:23:33 +08:00
|
|
|
if (NextCycle > CurrCycle)
|
2013-06-15 13:39:19 +08:00
|
|
|
bumpCycle(NextCycle);
|
2017-10-25 16:23:33 +08:00
|
|
|
else
|
2013-06-15 13:39:19 +08:00
|
|
|
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
|
2014-01-25 01:20:08 +08:00
|
|
|
// resource limited. If a stall occurred, bumpCycle does this.
|
2013-06-15 13:39:19 +08:00
|
|
|
IsResourceLimited =
|
2017-10-25 16:23:33 +08:00
|
|
|
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
|
2019-06-07 22:54:47 +08:00
|
|
|
getScheduledLatency(), true);
|
2017-10-25 16:23:33 +08:00
|
|
|
|
2013-12-06 01:56:02 +08:00
|
|
|
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
|
|
|
|
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
|
|
|
|
// one cycle. Since we commonly reach the max MOps here, opportunistically
|
|
|
|
// bump the cycle to avoid uselessly checking everything in the readyQ.
|
|
|
|
CurrMOps += IncMOps;
|
2017-03-28 04:46:37 +08:00
|
|
|
|
|
|
|
// Bump the cycle count for issue group constraints.
|
|
|
|
// This must be done after NextCycle has been adjust for all other stalls.
|
|
|
|
// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
|
|
|
|
// currCycle to X.
|
|
|
|
if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
|
|
|
|
(!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
|
|
|
|
<< " group\n");
|
2017-03-28 04:46:37 +08:00
|
|
|
bumpCycle(++NextCycle);
|
|
|
|
}
|
|
|
|
|
2013-12-06 01:56:02 +08:00
|
|
|
while (CurrMOps >= SchedModel->getIssueWidth()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
|
|
|
|
<< CurrCycle << '\n');
|
2013-12-29 05:56:57 +08:00
|
|
|
bumpCycle(++NextCycle);
|
2013-12-06 01:56:02 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dumpScheduledState());
|
2012-06-06 05:11:27 +08:00
|
|
|
}
|
|
|
|
|
2012-05-25 06:11:09 +08:00
|
|
|
/// Release pending ready nodes in to the available queue. This makes them
|
|
|
|
/// visible to heuristics.
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::releasePending() {
|
2012-05-25 06:11:09 +08:00
|
|
|
// If the available queue is empty, it is safe to reset MinReadyCycle.
|
|
|
|
if (Available.empty())
|
2017-02-23 06:32:51 +08:00
|
|
|
MinReadyCycle = std::numeric_limits<unsigned>::max();
|
2012-05-25 06:11:09 +08:00
|
|
|
|
|
|
|
// Check to see if any of the pending instructions are ready to issue. If
|
|
|
|
// so, add them to the available queue.
|
2020-01-02 03:53:16 +08:00
|
|
|
for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
|
|
|
|
SUnit *SU = *(Pending.begin() + I);
|
2012-06-06 05:11:27 +08:00
|
|
|
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
|
2012-05-25 06:11:09 +08:00
|
|
|
|
|
|
|
if (ReadyCycle < MinReadyCycle)
|
|
|
|
MinReadyCycle = ReadyCycle;
|
|
|
|
|
2016-04-23 03:09:17 +08:00
|
|
|
if (Available.size() >= ReadyListLimit)
|
|
|
|
break;
|
|
|
|
|
2020-01-08 22:12:15 +08:00
|
|
|
releaseNode(SU, ReadyCycle, true, I);
|
2020-01-02 03:53:16 +08:00
|
|
|
if (E != Pending.size()) {
|
|
|
|
--I;
|
|
|
|
--E;
|
|
|
|
}
|
2012-05-25 06:11:09 +08:00
|
|
|
}
|
|
|
|
CheckPending = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Remove SU from the ready set for this boundary.
|
2013-12-07 13:59:44 +08:00
|
|
|
void SchedBoundary::removeReady(SUnit *SU) {
|
2012-05-25 06:11:09 +08:00
|
|
|
if (Available.isInQueue(SU))
|
|
|
|
Available.remove(Available.find(SU));
|
|
|
|
else {
|
|
|
|
assert(Pending.isInQueue(SU) && "bad ready count");
|
|
|
|
Pending.remove(Pending.find(SU));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If this queue only has one ready candidate, return it. As a side effect,
|
2012-11-07 15:05:09 +08:00
|
|
|
/// defer any nodes that now hit a hazard, and advance the cycle until at least
|
|
|
|
/// one node is ready. If multiple instructions are ready, return NULL.
|
2013-12-07 13:59:44 +08:00
|
|
|
SUnit *SchedBoundary::pickOnlyChoice() {
|
2012-05-25 06:11:09 +08:00
|
|
|
if (CheckPending)
|
|
|
|
releasePending();
|
|
|
|
|
2020-06-10 00:35:45 +08:00
|
|
|
// Defer any ready instrs that now have a hazard.
|
|
|
|
for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
|
|
|
|
if (checkHazard(*I)) {
|
|
|
|
Pending.push(*I);
|
|
|
|
I = Available.remove(I);
|
|
|
|
continue;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2020-06-10 00:35:45 +08:00
|
|
|
++I;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2012-05-25 06:11:09 +08:00
|
|
|
for (unsigned i = 0; Available.empty(); ++i) {
|
2014-07-03 00:46:08 +08:00
|
|
|
// FIXME: Re-enable assert once PR20057 is resolved.
|
|
|
|
// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
|
|
|
|
// "permanent hazard");
|
|
|
|
(void)i;
|
2013-06-15 13:39:19 +08:00
|
|
|
bumpCycle(CurrCycle + 1);
|
2012-05-25 06:11:09 +08:00
|
|
|
releasePending();
|
|
|
|
}
|
2016-06-24 05:27:38 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(Pending.dump());
|
|
|
|
LLVM_DEBUG(Available.dump());
|
2016-06-24 05:27:38 +08:00
|
|
|
|
2012-05-25 06:11:09 +08:00
|
|
|
if (Available.size() == 1)
|
|
|
|
return *Available.begin();
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2012-05-25 06:11:09 +08:00
|
|
|
}
|
|
|
|
|
2017-10-15 22:32:27 +08:00
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
2013-06-15 13:39:19 +08:00
|
|
|
// This is useful information to dump after bumpNode.
|
|
|
|
// Note that the Queue contents are more useful before pickNodeFromQueue.
|
2017-06-22 06:19:17 +08:00
|
|
|
LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned ResFactor;
|
|
|
|
unsigned ResCount;
|
|
|
|
if (ZoneCritResIdx) {
|
|
|
|
ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
|
|
|
|
ResCount = getResourceCount(ZoneCritResIdx);
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2013-06-15 13:39:19 +08:00
|
|
|
ResFactor = SchedModel->getMicroOpFactor();
|
2017-09-27 18:31:58 +08:00
|
|
|
ResCount = RetiredMOps * ResFactor;
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-06-15 13:39:19 +08:00
|
|
|
unsigned LFactor = SchedModel->getLatencyFactor();
|
|
|
|
dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
|
|
|
|
<< " Retired: " << RetiredMOps;
|
|
|
|
dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
|
|
|
|
dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
|
2013-12-07 13:59:44 +08:00
|
|
|
<< ResCount / ResFactor << " "
|
|
|
|
<< SchedModel->getResourceName(ZoneCritResIdx)
|
2013-06-15 13:39:19 +08:00
|
|
|
<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
|
|
|
|
<< (IsResourceLimited ? " - Resource" : " - Latency")
|
|
|
|
<< " limited.\n";
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2013-06-15 13:46:47 +08:00
|
|
|
#endif
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2013-12-29 05:56:57 +08:00
|
|
|
// GenericScheduler - Generic implementation of MachineSchedStrategy.
|
2013-12-07 13:59:44 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2013-12-29 05:56:57 +08:00
|
|
|
void GenericSchedulerBase::SchedCandidate::
|
|
|
|
initResourceDelta(const ScheduleDAGMI *DAG,
|
|
|
|
const TargetSchedModel *SchedModel) {
|
|
|
|
if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
|
|
|
|
return;
|
|
|
|
|
|
|
|
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
|
|
|
|
for (TargetSchedModel::ProcResIter
|
|
|
|
PI = SchedModel->getWriteProcResBegin(SC),
|
|
|
|
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
|
|
|
|
if (PI->ProcResourceIdx == Policy.ReduceResIdx)
|
|
|
|
ResDelta.CritResources += PI->Cycles;
|
|
|
|
if (PI->ProcResourceIdx == Policy.DemandResIdx)
|
|
|
|
ResDelta.DemandedResources += PI->Cycles;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-22 05:48:43 +08:00
|
|
|
/// Compute remaining latency. We need this both to determine whether the
|
|
|
|
/// overall schedule has become latency-limited and whether the instructions
|
|
|
|
/// outside this zone are resource or latency limited.
|
|
|
|
///
|
|
|
|
/// The "dependent" latency is updated incrementally during scheduling as the
|
|
|
|
/// max height/depth of scheduled nodes minus the cycles since it was
|
|
|
|
/// scheduled:
|
|
|
|
/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
|
|
|
|
///
|
|
|
|
/// The "independent" latency is the max ready queue depth:
|
|
|
|
/// ILat = max N.depth for N in Available|Pending
|
|
|
|
///
|
|
|
|
/// RemainingLatency is the greater of independent and dependent latency.
|
|
|
|
///
|
|
|
|
/// These computations are expensive, especially in DAGs with many edges, so
|
|
|
|
/// only do them if necessary.
|
|
|
|
static unsigned computeRemLatency(SchedBoundary &CurrZone) {
|
|
|
|
unsigned RemLatency = CurrZone.getDependentLatency();
|
|
|
|
RemLatency = std::max(RemLatency,
|
|
|
|
CurrZone.findMaxLatency(CurrZone.Available.elements()));
|
|
|
|
RemLatency = std::max(RemLatency,
|
|
|
|
CurrZone.findMaxLatency(CurrZone.Pending.elements()));
|
|
|
|
return RemLatency;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if the current cycle plus remaning latency is greater than
|
2019-01-09 13:11:10 +08:00
|
|
|
/// the critical path in the scheduling region.
|
2018-08-22 05:48:43 +08:00
|
|
|
bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
|
|
|
|
SchedBoundary &CurrZone,
|
|
|
|
bool ComputeRemLatency,
|
|
|
|
unsigned &RemLatency) const {
|
|
|
|
// The current cycle is already greater than the critical path, so we are
|
2019-01-09 13:11:10 +08:00
|
|
|
// already latency limited and don't need to compute the remaining latency.
|
2018-08-22 05:48:43 +08:00
|
|
|
if (CurrZone.getCurrCycle() > Rem.CriticalPath)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// If we haven't scheduled anything yet, then we aren't latency limited.
|
|
|
|
if (CurrZone.getCurrCycle() == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (ComputeRemLatency)
|
|
|
|
RemLatency = computeRemLatency(CurrZone);
|
|
|
|
|
|
|
|
return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:57 +08:00
|
|
|
/// Set the CandPolicy given a scheduling zone given the current resources and
|
|
|
|
/// latencies inside and outside the zone.
|
2016-04-21 09:54:13 +08:00
|
|
|
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
|
2013-12-29 05:56:57 +08:00
|
|
|
SchedBoundary &CurrZone,
|
|
|
|
SchedBoundary *OtherZone) {
|
2015-06-19 09:53:21 +08:00
|
|
|
// Apply preemptive heuristics based on the total latency and resources
|
2013-12-29 05:56:57 +08:00
|
|
|
// inside and outside this zone. Potential stalls should be considered before
|
|
|
|
// following this policy.
|
|
|
|
|
|
|
|
// Compute the critical resource outside the zone.
|
2013-12-29 06:25:57 +08:00
|
|
|
unsigned OtherCritIdx = 0;
|
2013-12-29 05:56:57 +08:00
|
|
|
unsigned OtherCount =
|
|
|
|
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
|
|
|
|
|
|
|
|
bool OtherResLimited = false;
|
2018-08-22 05:48:43 +08:00
|
|
|
unsigned RemLatency = 0;
|
|
|
|
bool RemLatencyComputed = false;
|
|
|
|
if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
|
|
|
|
RemLatency = computeRemLatency(CurrZone);
|
|
|
|
RemLatencyComputed = true;
|
2017-10-25 16:23:33 +08:00
|
|
|
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
|
2019-06-07 22:54:47 +08:00
|
|
|
OtherCount, RemLatency, false);
|
2018-08-22 05:48:43 +08:00
|
|
|
}
|
2017-10-25 16:23:33 +08:00
|
|
|
|
2013-12-29 05:56:57 +08:00
|
|
|
// Schedule aggressively for latency in PostRA mode. We don't check for
|
|
|
|
// acyclic latency during PostRA, and highly out-of-order processors will
|
|
|
|
// skip PostRA scheduling.
|
2018-08-22 05:48:43 +08:00
|
|
|
if (!OtherResLimited &&
|
|
|
|
(IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
|
|
|
|
RemLatency))) {
|
|
|
|
Policy.ReduceLatency |= true;
|
|
|
|
LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
|
|
|
|
<< " RemainingLatency " << RemLatency << " + "
|
|
|
|
<< CurrZone.getCurrCycle() << "c > CritPath "
|
|
|
|
<< Rem.CriticalPath << "\n");
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
// If the same resource is limiting inside and outside the zone, do nothing.
|
|
|
|
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
|
|
|
|
return;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
|
|
|
|
dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
|
|
|
|
<< SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
|
|
|
|
} if (OtherResLimited) dbgs()
|
|
|
|
<< " RemainingLimit: "
|
|
|
|
<< SchedModel->getResourceName(OtherCritIdx) << "\n";
|
|
|
|
if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
|
|
|
|
<< " Latency limited both directions.\n");
|
2013-12-29 05:56:57 +08:00
|
|
|
|
|
|
|
if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
|
|
|
|
Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
|
|
|
|
|
|
|
|
if (OtherResLimited)
|
|
|
|
Policy.DemandResIdx = OtherCritIdx;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
const char *GenericSchedulerBase::getReasonStr(
|
|
|
|
GenericSchedulerBase::CandReason Reason) {
|
|
|
|
switch (Reason) {
|
|
|
|
case NoCand: return "NOCAND ";
|
2016-05-28 06:14:26 +08:00
|
|
|
case Only1: return "ONLY1 ";
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
case PhysReg: return "PHYS-REG ";
|
2013-12-29 05:56:57 +08:00
|
|
|
case RegExcess: return "REG-EXCESS";
|
|
|
|
case RegCritical: return "REG-CRIT ";
|
|
|
|
case Stall: return "STALL ";
|
|
|
|
case Cluster: return "CLUSTER ";
|
|
|
|
case Weak: return "WEAK ";
|
|
|
|
case RegMax: return "REG-MAX ";
|
|
|
|
case ResourceReduce: return "RES-REDUCE";
|
|
|
|
case ResourceDemand: return "RES-DEMAND";
|
|
|
|
case TopDepthReduce: return "TOP-DEPTH ";
|
|
|
|
case TopPathReduce: return "TOP-PATH ";
|
|
|
|
case BotHeightReduce:return "BOT-HEIGHT";
|
|
|
|
case BotPathReduce: return "BOT-PATH ";
|
|
|
|
case NextDefUse: return "DEF-USE ";
|
|
|
|
case NodeOrder: return "ORDER ";
|
|
|
|
};
|
|
|
|
llvm_unreachable("Unknown reason!");
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
|
|
|
|
PressureChange P;
|
|
|
|
unsigned ResIdx = 0;
|
|
|
|
unsigned Latency = 0;
|
|
|
|
switch (Cand.Reason) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case RegExcess:
|
|
|
|
P = Cand.RPDelta.Excess;
|
|
|
|
break;
|
|
|
|
case RegCritical:
|
|
|
|
P = Cand.RPDelta.CriticalMax;
|
|
|
|
break;
|
|
|
|
case RegMax:
|
|
|
|
P = Cand.RPDelta.CurrentMax;
|
|
|
|
break;
|
|
|
|
case ResourceReduce:
|
|
|
|
ResIdx = Cand.Policy.ReduceResIdx;
|
|
|
|
break;
|
|
|
|
case ResourceDemand:
|
|
|
|
ResIdx = Cand.Policy.DemandResIdx;
|
|
|
|
break;
|
|
|
|
case TopDepthReduce:
|
|
|
|
Latency = Cand.SU->getDepth();
|
|
|
|
break;
|
|
|
|
case TopPathReduce:
|
|
|
|
Latency = Cand.SU->getHeight();
|
|
|
|
break;
|
|
|
|
case BotHeightReduce:
|
|
|
|
Latency = Cand.SU->getHeight();
|
|
|
|
break;
|
|
|
|
case BotPathReduce:
|
|
|
|
Latency = Cand.SU->getDepth();
|
|
|
|
break;
|
|
|
|
}
|
2015-09-19 02:52:20 +08:00
|
|
|
dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
|
2013-12-29 05:56:57 +08:00
|
|
|
if (P.isValid())
|
|
|
|
dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
|
|
|
|
<< ":" << P.getUnitInc() << " ";
|
|
|
|
else
|
|
|
|
dbgs() << " ";
|
|
|
|
if (ResIdx)
|
|
|
|
dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
|
|
|
|
else
|
|
|
|
dbgs() << " ";
|
|
|
|
if (Latency)
|
|
|
|
dbgs() << " " << Latency << " cycles ";
|
|
|
|
else
|
|
|
|
dbgs() << " ";
|
|
|
|
dbgs() << '\n';
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-04-12 15:21:39 +08:00
|
|
|
namespace llvm {
|
2013-12-29 05:56:57 +08:00
|
|
|
/// Return true if this heuristic determines order.
|
2018-04-12 15:21:39 +08:00
|
|
|
bool tryLess(int TryVal, int CandVal,
|
|
|
|
GenericSchedulerBase::SchedCandidate &TryCand,
|
|
|
|
GenericSchedulerBase::SchedCandidate &Cand,
|
|
|
|
GenericSchedulerBase::CandReason Reason) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (TryVal < CandVal) {
|
|
|
|
TryCand.Reason = Reason;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (TryVal > CandVal) {
|
|
|
|
if (Cand.Reason > Reason)
|
|
|
|
Cand.Reason = Reason;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-12 15:21:39 +08:00
|
|
|
bool tryGreater(int TryVal, int CandVal,
|
|
|
|
GenericSchedulerBase::SchedCandidate &TryCand,
|
|
|
|
GenericSchedulerBase::SchedCandidate &Cand,
|
|
|
|
GenericSchedulerBase::CandReason Reason) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (TryVal > CandVal) {
|
|
|
|
TryCand.Reason = Reason;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (TryVal < CandVal) {
|
|
|
|
if (Cand.Reason > Reason)
|
|
|
|
Cand.Reason = Reason;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-12 15:21:39 +08:00
|
|
|
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
|
|
|
|
GenericSchedulerBase::SchedCandidate &Cand,
|
|
|
|
SchedBoundary &Zone) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (Zone.isTop()) {
|
2020-01-07 23:43:46 +08:00
|
|
|
// Prefer the candidate with the lesser depth, but only if one of them has
|
|
|
|
// depth greater than the total latency scheduled so far, otherwise either
|
|
|
|
// of them could be scheduled now with no stall.
|
|
|
|
if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
|
|
|
|
Zone.getScheduledLatency()) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
|
|
|
|
TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
|
|
|
|
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
|
|
|
|
return true;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2020-01-07 23:43:46 +08:00
|
|
|
// Prefer the candidate with the lesser height, but only if one of them has
|
|
|
|
// height greater than the total latency scheduled so far, otherwise either
|
|
|
|
// of them could be scheduled now with no stall.
|
|
|
|
if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
|
|
|
|
Zone.getScheduledLatency()) {
|
2013-12-29 05:56:57 +08:00
|
|
|
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
|
|
|
|
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
|
|
|
|
TryCand, Cand, GenericSchedulerBase::BotPathReduce))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2018-04-12 15:21:39 +08:00
|
|
|
} // end namespace llvm
|
2013-12-29 05:56:57 +08:00
|
|
|
|
2016-05-28 06:14:26 +08:00
|
|
|
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
|
|
|
|
<< GenericSchedulerBase::getReasonStr(Reason) << '\n');
|
2016-05-28 06:14:26 +08:00
|
|
|
}
|
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
|
|
|
|
tracePick(Cand.Reason, Cand.AtTop);
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(dag->hasVRegLiveness() &&
|
|
|
|
"(PreRA)GenericScheduler needs vreg liveness");
|
|
|
|
DAG = static_cast<ScheduleDAGMILive*>(dag);
|
2013-12-07 13:59:44 +08:00
|
|
|
SchedModel = DAG->getSchedModel();
|
|
|
|
TRI = DAG->TRI;
|
|
|
|
|
2020-03-31 03:52:14 +08:00
|
|
|
if (RegionPolicy.ComputeDFSResult)
|
|
|
|
DAG->computeDFSResult();
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
Rem.init(DAG, SchedModel);
|
|
|
|
Top.init(DAG, SchedModel, &Rem);
|
|
|
|
Bot.init(DAG, SchedModel, &Rem);
|
|
|
|
|
|
|
|
// Initialize resource counts.
|
|
|
|
|
|
|
|
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
|
|
|
|
// are disabled, then these HazardRecs will be disabled.
|
|
|
|
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
|
|
|
|
if (!Top.HazardRec) {
|
|
|
|
Top.HazardRec =
|
2014-10-14 14:56:25 +08:00
|
|
|
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
|
2014-08-05 05:25:23 +08:00
|
|
|
Itin, DAG);
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
|
|
|
if (!Bot.HazardRec) {
|
|
|
|
Bot.HazardRec =
|
2014-10-14 14:56:25 +08:00
|
|
|
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
|
2014-08-05 05:25:23 +08:00
|
|
|
Itin, DAG);
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
2016-06-25 10:03:36 +08:00
|
|
|
TopCand.SU = nullptr;
|
|
|
|
BotCand.SU = nullptr;
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Initialize the per-region scheduling policy.
|
|
|
|
void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
|
|
|
|
MachineBasicBlock::iterator End,
|
|
|
|
unsigned NumRegionInstrs) {
|
2017-10-11 07:50:49 +08:00
|
|
|
const MachineFunction &MF = *Begin->getMF();
|
2014-10-14 14:56:25 +08:00
|
|
|
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
|
2013-12-07 13:59:44 +08:00
|
|
|
|
|
|
|
// Avoid setting up the register pressure tracker for small regions to save
|
|
|
|
// compile time. As a rough heuristic, only track pressure when the number of
|
|
|
|
// schedulable instructions exceeds half the integer register file.
|
2014-01-22 05:27:37 +08:00
|
|
|
RegionPolicy.ShouldTrackPressure = true;
|
2014-01-22 11:38:55 +08:00
|
|
|
for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
|
|
|
|
MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
|
|
|
|
if (TLI->isTypeLegal(LegalIntVT)) {
|
2014-01-22 05:27:37 +08:00
|
|
|
unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
|
2014-01-22 11:38:55 +08:00
|
|
|
TLI->getRegClassFor(LegalIntVT));
|
2014-01-22 05:27:37 +08:00
|
|
|
RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
|
|
|
|
}
|
|
|
|
}
|
2013-12-07 13:59:44 +08:00
|
|
|
|
|
|
|
// For generic targets, we default to bottom-up, because it's simpler and more
|
|
|
|
// compile-time optimizations have been implemented in that direction.
|
|
|
|
RegionPolicy.OnlyBottomUp = true;
|
|
|
|
|
|
|
|
// Allow the subtarget to override default policy.
|
2016-07-01 08:23:27 +08:00
|
|
|
MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
|
2013-12-07 13:59:44 +08:00
|
|
|
|
|
|
|
// After subtarget overrides, apply command line options.
|
2019-05-31 07:31:36 +08:00
|
|
|
if (!EnableRegPressure) {
|
2013-12-07 13:59:44 +08:00
|
|
|
RegionPolicy.ShouldTrackPressure = false;
|
2019-05-31 07:31:36 +08:00
|
|
|
RegionPolicy.ShouldTrackLaneMasks = false;
|
|
|
|
}
|
2013-12-07 13:59:44 +08:00
|
|
|
|
|
|
|
// Check -misched-topdown/bottomup can force or unforce scheduling direction.
|
|
|
|
// e.g. -misched-bottomup=false allows scheduling in both directions.
|
|
|
|
assert((!ForceTopDown || !ForceBottomUp) &&
|
|
|
|
"-misched-topdown incompatible with -misched-bottomup");
|
|
|
|
if (ForceBottomUp.getNumOccurrences() > 0) {
|
|
|
|
RegionPolicy.OnlyBottomUp = ForceBottomUp;
|
|
|
|
if (RegionPolicy.OnlyBottomUp)
|
|
|
|
RegionPolicy.OnlyTopDown = false;
|
|
|
|
}
|
|
|
|
if (ForceTopDown.getNumOccurrences() > 0) {
|
|
|
|
RegionPolicy.OnlyTopDown = ForceTopDown;
|
|
|
|
if (RegionPolicy.OnlyTopDown)
|
|
|
|
RegionPolicy.OnlyBottomUp = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-22 06:19:17 +08:00
|
|
|
void GenericScheduler::dumpPolicy() const {
|
2017-01-28 10:02:38 +08:00
|
|
|
// Cannot completely remove virtual function even in release mode.
|
2017-10-15 22:32:27 +08:00
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
2015-09-19 02:52:20 +08:00
|
|
|
dbgs() << "GenericScheduler RegionPolicy: "
|
|
|
|
<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
|
|
|
|
<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
|
|
|
|
<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
|
|
|
|
<< "\n";
|
2017-01-28 10:02:38 +08:00
|
|
|
#endif
|
2015-09-19 02:52:20 +08:00
|
|
|
}
|
|
|
|
|
2013-12-07 13:59:44 +08:00
|
|
|
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
|
|
|
|
/// critical path by more cycles than it takes to drain the instruction buffer.
|
|
|
|
/// We estimate an upper bounds on in-flight instructions as:
|
|
|
|
///
|
|
|
|
/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
|
|
|
|
/// InFlightIterations = AcyclicPath / CyclesPerIteration
|
|
|
|
/// InFlightResources = InFlightIterations * LoopResources
|
|
|
|
///
|
|
|
|
/// TODO: Check execution resources in addition to IssueCount.
|
|
|
|
void GenericScheduler::checkAcyclicLatency() {
|
|
|
|
if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Scaled number of cycles per loop iteration.
|
|
|
|
unsigned IterCount =
|
|
|
|
std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
|
|
|
|
Rem.RemIssueCount);
|
|
|
|
// Scaled acyclic critical path.
|
|
|
|
unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
|
|
|
|
// InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
|
|
|
|
unsigned InFlightCount =
|
|
|
|
(AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
|
|
|
|
unsigned BufferLimit =
|
|
|
|
SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
|
|
|
|
|
|
|
|
Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "IssueCycles="
|
|
|
|
<< Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
|
|
|
|
<< "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
|
|
|
|
<< "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
|
|
|
|
<< " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
|
|
|
|
<< "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
|
|
|
|
if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void GenericScheduler::registerRoots() {
|
|
|
|
Rem.CriticalPath = DAG->ExitSU.getDepth();
|
|
|
|
|
|
|
|
// Some roots may not feed into ExitSU. Check all of them in case.
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const SUnit *SU : Bot.Available) {
|
|
|
|
if (SU->getDepth() > Rem.CriticalPath)
|
|
|
|
Rem.CriticalPath = SU->getDepth();
|
2013-12-07 13:59:44 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
|
2014-08-08 05:49:44 +08:00
|
|
|
if (DumpCriticalPathLength) {
|
|
|
|
errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
|
|
|
|
}
|
2013-12-07 13:59:44 +08:00
|
|
|
|
2017-04-13 02:09:05 +08:00
|
|
|
if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
|
2013-12-07 13:59:44 +08:00
|
|
|
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
|
|
|
|
checkAcyclicLatency();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-12 15:21:39 +08:00
|
|
|
namespace llvm {
|
|
|
|
bool tryPressure(const PressureChange &TryP,
|
|
|
|
const PressureChange &CandP,
|
|
|
|
GenericSchedulerBase::SchedCandidate &TryCand,
|
|
|
|
GenericSchedulerBase::SchedCandidate &Cand,
|
|
|
|
GenericSchedulerBase::CandReason Reason,
|
|
|
|
const TargetRegisterInfo *TRI,
|
|
|
|
const MachineFunction &MF) {
|
2013-08-30 12:27:29 +08:00
|
|
|
// If one candidate decreases and the other increases, go with it.
|
|
|
|
// Invalid candidates have UnitInc==0.
|
2014-10-11 01:06:20 +08:00
|
|
|
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
|
|
|
|
Reason)) {
|
2013-08-30 12:27:29 +08:00
|
|
|
return true;
|
2013-07-25 15:26:35 +08:00
|
|
|
}
|
2016-06-25 08:23:00 +08:00
|
|
|
// Do not compare the magnitude of pressure changes between top and bottom
|
|
|
|
// boundary.
|
|
|
|
if (Cand.AtTop != TryCand.AtTop)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If both candidates affect the same set in the same boundary, go with the
|
|
|
|
// smallest increase.
|
|
|
|
unsigned TryPSet = TryP.getPSetOrMax();
|
|
|
|
unsigned CandPSet = CandP.getPSetOrMax();
|
|
|
|
if (TryPSet == CandPSet) {
|
|
|
|
return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
|
|
|
|
Reason);
|
|
|
|
}
|
2015-12-17 02:31:01 +08:00
|
|
|
|
|
|
|
int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
|
|
|
|
std::numeric_limits<int>::max();
|
|
|
|
|
|
|
|
int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
|
|
|
|
std::numeric_limits<int>::max();
|
|
|
|
|
2013-07-25 15:26:35 +08:00
|
|
|
// If the candidates are decreasing pressure, reverse priority.
|
2013-08-30 11:49:48 +08:00
|
|
|
if (TryP.getUnitInc() < 0)
|
2013-07-25 15:26:35 +08:00
|
|
|
std::swap(TryRank, CandRank);
|
|
|
|
return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
|
|
|
|
}
|
|
|
|
|
2018-04-12 15:21:39 +08:00
|
|
|
unsigned getWeakLeft(const SUnit *SU, bool isTop) {
|
2012-11-13 03:40:10 +08:00
|
|
|
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
|
|
|
|
}
|
|
|
|
|
2013-04-13 14:07:40 +08:00
|
|
|
/// Minimize physical register live ranges. Regalloc wants them adjacent to
|
|
|
|
/// their physreg def/use.
|
|
|
|
///
|
|
|
|
/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
|
|
|
|
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
|
|
|
|
/// with the operation that produces or consumes the physreg. We'll do this when
|
|
|
|
/// regalloc has support for parallel copies.
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
int biasPhysReg(const SUnit *SU, bool isTop) {
|
2013-04-13 14:07:40 +08:00
|
|
|
const MachineInstr *MI = SU->getInstr();
|
|
|
|
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
if (MI->isCopy()) {
|
|
|
|
unsigned ScheduledOper = isTop ? 1 : 0;
|
|
|
|
unsigned UnscheduledOper = isTop ? 0 : 1;
|
|
|
|
// If we have already scheduled the physreg produce/consumer, immediately
|
|
|
|
// schedule the copy.
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
return 1;
|
|
|
|
// If the physreg is at the boundary, defer it. Otherwise schedule it
|
|
|
|
// immediately to free the dependent. We can hoist the copy later.
|
|
|
|
bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
return AtBoundary ? -1 : 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (MI->isMoveImmediate()) {
|
|
|
|
// If we have a move immediate and all successors have been assigned, bias
|
|
|
|
// towards scheduling this later. Make sure all register defs are to
|
|
|
|
// physical registers.
|
|
|
|
bool DoBias = true;
|
|
|
|
for (const MachineOperand &Op : MI->defs()) {
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
DoBias = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DoBias)
|
|
|
|
return isTop ? -1 : 1;
|
|
|
|
}
|
|
|
|
|
2013-04-13 14:07:40 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2018-04-12 15:21:39 +08:00
|
|
|
} // end namespace llvm
|
2013-04-13 14:07:40 +08:00
|
|
|
|
2016-04-23 03:10:15 +08:00
|
|
|
void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
|
|
|
|
bool AtTop,
|
|
|
|
const RegPressureTracker &RPTracker,
|
|
|
|
RegPressureTracker &TempTracker) {
|
|
|
|
Cand.SU = SU;
|
2016-06-25 08:23:00 +08:00
|
|
|
Cand.AtTop = AtTop;
|
2013-09-05 05:00:11 +08:00
|
|
|
if (DAG->isTrackingPressure()) {
|
2016-04-23 03:10:15 +08:00
|
|
|
if (AtTop) {
|
2013-09-05 05:00:02 +08:00
|
|
|
TempTracker.getMaxDownwardPressureDelta(
|
2016-04-23 03:10:15 +08:00
|
|
|
Cand.SU->getInstr(),
|
|
|
|
Cand.RPDelta,
|
2013-08-30 11:49:48 +08:00
|
|
|
DAG->getRegionCriticalPSets(),
|
|
|
|
DAG->getRegPressure().MaxSetPressure);
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2013-09-05 05:00:02 +08:00
|
|
|
if (VerifyScheduling) {
|
|
|
|
TempTracker.getMaxUpwardPressureDelta(
|
2016-04-23 03:10:15 +08:00
|
|
|
Cand.SU->getInstr(),
|
|
|
|
&DAG->getPressureDiff(Cand.SU),
|
|
|
|
Cand.RPDelta,
|
2013-09-05 05:00:02 +08:00
|
|
|
DAG->getRegionCriticalPSets(),
|
|
|
|
DAG->getRegPressure().MaxSetPressure);
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2013-09-05 05:00:02 +08:00
|
|
|
RPTracker.getUpwardPressureDelta(
|
2016-04-23 03:10:15 +08:00
|
|
|
Cand.SU->getInstr(),
|
|
|
|
DAG->getPressureDiff(Cand.SU),
|
|
|
|
Cand.RPDelta,
|
2013-09-05 05:00:02 +08:00
|
|
|
DAG->getRegionCriticalPSets(),
|
|
|
|
DAG->getRegPressure().MaxSetPressure);
|
|
|
|
}
|
2013-08-30 11:49:48 +08:00
|
|
|
}
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
|
|
|
|
<< " Try SU(" << Cand.SU->NodeNum << ") "
|
|
|
|
<< TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
|
|
|
|
<< Cand.RPDelta.Excess.getUnitInc() << "\n");
|
2016-04-23 03:10:15 +08:00
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2018-06-20 13:29:26 +08:00
|
|
|
/// Apply a set of heuristics to a new candidate. Heuristics are currently
|
2016-04-23 03:10:15 +08:00
|
|
|
/// hierarchical. This may be more efficient than a graduated cost model because
|
|
|
|
/// we don't need to evaluate all aspects of the model for each node in the
|
|
|
|
/// queue. But it's really done to make the heuristics easier to debug and
|
|
|
|
/// statistically analyze.
|
|
|
|
///
|
|
|
|
/// \param Cand provides the policy and current best candidate.
|
|
|
|
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
|
2016-06-25 08:23:00 +08:00
|
|
|
/// \param Zone describes the scheduled zone that we are extending, or nullptr
|
|
|
|
// if Cand is from a different zone than TryCand.
|
2016-04-23 03:10:15 +08:00
|
|
|
void GenericScheduler::tryCandidate(SchedCandidate &Cand,
|
|
|
|
SchedCandidate &TryCand,
|
2018-04-12 15:21:39 +08:00
|
|
|
SchedBoundary *Zone) const {
|
2012-11-07 15:05:09 +08:00
|
|
|
// Initialize the candidate if needed.
|
|
|
|
if (!Cand.isValid()) {
|
|
|
|
TryCand.Reason = NodeOrder;
|
|
|
|
return;
|
|
|
|
}
|
2013-04-13 14:07:40 +08:00
|
|
|
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
// Bias PhysReg Defs and copies to their uses and defined respectively.
|
|
|
|
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
|
|
|
|
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
|
2013-04-13 14:07:40 +08:00
|
|
|
return;
|
|
|
|
|
2015-05-18 07:40:27 +08:00
|
|
|
// Avoid exceeding the target's limit.
|
2013-09-05 05:00:11 +08:00
|
|
|
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
|
|
|
|
Cand.RPDelta.Excess,
|
2015-12-17 02:31:01 +08:00
|
|
|
TryCand, Cand, RegExcess, TRI,
|
|
|
|
DAG->MF))
|
2012-11-07 15:05:09 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// Avoid increasing the max critical pressure in the scheduled region.
|
2013-09-05 05:00:11 +08:00
|
|
|
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
|
|
|
|
Cand.RPDelta.CriticalMax,
|
2015-12-17 02:31:01 +08:00
|
|
|
TryCand, Cand, RegCritical, TRI,
|
|
|
|
DAG->MF))
|
2012-11-07 15:05:09 +08:00
|
|
|
return;
|
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
// We only compare a subset of features when comparing nodes between
|
|
|
|
// Top and Bottom boundary. Some properties are simply incomparable, in many
|
|
|
|
// other instances we should only override the other boundary if something
|
|
|
|
// is a clear good pick on one boundary. Skip heuristics that are more
|
|
|
|
// "tie-breaking" in nature.
|
|
|
|
bool SameBoundary = Zone != nullptr;
|
|
|
|
if (SameBoundary) {
|
|
|
|
// For loops that are acyclic path limited, aggressively schedule for
|
2016-11-04 16:31:14 +08:00
|
|
|
// latency. Within an single cycle, whenever CurrMOps > 0, allow normal
|
|
|
|
// heuristics to take precedence.
|
2016-06-25 08:23:00 +08:00
|
|
|
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
|
|
|
|
tryLatency(TryCand, Cand, *Zone))
|
|
|
|
return;
|
2013-09-07 01:32:36 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
// Prioritize instructions that read unbuffered resources by stall cycles.
|
|
|
|
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
|
|
|
|
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
|
|
|
|
return;
|
|
|
|
}
|
2013-12-06 01:55:58 +08:00
|
|
|
|
2012-11-13 03:40:10 +08:00
|
|
|
// Keep clustered nodes together to encourage downstream peephole
|
|
|
|
// optimizations which may reduce resource requirements.
|
|
|
|
//
|
|
|
|
// This is a best effort to set things up for a post-RA pass. Optimizations
|
|
|
|
// like generating loads of multiple registers should ideally be done within
|
|
|
|
// the scheduler pass by combining the loads during DAG postprocessing.
|
2016-06-25 08:23:00 +08:00
|
|
|
const SUnit *CandNextClusterSU =
|
|
|
|
Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
|
|
|
|
const SUnit *TryCandNextClusterSU =
|
|
|
|
TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
|
|
|
|
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
|
|
|
|
Cand.SU == CandNextClusterSU,
|
2012-11-13 03:40:10 +08:00
|
|
|
TryCand, Cand, Cluster))
|
|
|
|
return;
|
2013-04-24 23:54:43 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
if (SameBoundary) {
|
|
|
|
// Weak edges are for clustering and other constraints.
|
|
|
|
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
|
|
|
|
getWeakLeft(Cand.SU, Cand.AtTop),
|
|
|
|
TryCand, Cand, Weak))
|
|
|
|
return;
|
2012-11-13 03:40:10 +08:00
|
|
|
}
|
2016-06-25 08:23:00 +08:00
|
|
|
|
2013-06-18 05:45:13 +08:00
|
|
|
// Avoid increasing the max pressure of the entire region.
|
2013-09-05 05:00:11 +08:00
|
|
|
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
|
|
|
|
Cand.RPDelta.CurrentMax,
|
2015-12-17 02:31:01 +08:00
|
|
|
TryCand, Cand, RegMax, TRI,
|
|
|
|
DAG->MF))
|
2013-06-18 05:45:13 +08:00
|
|
|
return;
|
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
if (SameBoundary) {
|
|
|
|
// Avoid critical resource consumption and balance the schedule.
|
|
|
|
TryCand.initResourceDelta(DAG, SchedModel);
|
|
|
|
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
|
|
|
|
TryCand, Cand, ResourceReduce))
|
|
|
|
return;
|
|
|
|
if (tryGreater(TryCand.ResDelta.DemandedResources,
|
|
|
|
Cand.ResDelta.DemandedResources,
|
|
|
|
TryCand, Cand, ResourceDemand))
|
|
|
|
return;
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
// Avoid serializing long latency dependence chains.
|
|
|
|
// For acyclic path limited loops, latency was already checked above.
|
|
|
|
if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
|
|
|
|
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
|
|
|
|
return;
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
// Fall through to original instruction order.
|
|
|
|
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|
|
|
|
|| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
|
|
|
|
TryCand.Reason = NodeOrder;
|
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
2012-05-11 05:06:19 +08:00
|
|
|
}
|
|
|
|
|
2013-09-07 01:32:44 +08:00
|
|
|
/// Pick the best candidate from the queue.
|
2012-05-11 05:06:16 +08:00
|
|
|
///
|
|
|
|
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
|
|
|
|
/// DAG building. To adjust for the current scheduling location we need to
|
|
|
|
/// maintain the number of vreg uses remaining to be top-scheduled.
|
2013-09-20 07:10:59 +08:00
|
|
|
void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
|
2016-06-25 08:23:00 +08:00
|
|
|
const CandPolicy &ZonePolicy,
|
2013-12-06 01:55:47 +08:00
|
|
|
const RegPressureTracker &RPTracker,
|
|
|
|
SchedCandidate &Cand) {
|
2012-05-11 05:06:16 +08:00
|
|
|
// getMaxPressureDelta temporarily modifies the tracker.
|
|
|
|
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
|
|
|
|
|
2016-06-24 05:27:38 +08:00
|
|
|
ReadyQueue &Q = Zone.Available;
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit *SU : Q) {
|
2012-05-11 05:06:16 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
SchedCandidate TryCand(ZonePolicy);
|
2017-06-21 17:10:10 +08:00
|
|
|
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
|
2016-06-25 08:23:00 +08:00
|
|
|
// Pass SchedBoundary only when comparing nodes from the same boundary.
|
|
|
|
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
|
|
|
|
tryCandidate(Cand, TryCand, ZoneArg);
|
2012-11-07 15:05:09 +08:00
|
|
|
if (TryCand.Reason != NoCand) {
|
|
|
|
// Initialize resource delta if needed in case future heuristics query it.
|
|
|
|
if (TryCand.ResDelta == SchedResourceDelta())
|
|
|
|
TryCand.initResourceDelta(DAG, SchedModel);
|
|
|
|
Cand.setBest(TryCand);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(traceCandidate(Cand));
|
2012-05-11 05:06:16 +08:00
|
|
|
}
|
|
|
|
}
|
2012-11-07 15:05:09 +08:00
|
|
|
}
|
|
|
|
|
2012-05-18 02:35:10 +08:00
|
|
|
/// Pick the best candidate node from either the top or bottom queue.
|
2013-09-20 07:10:59 +08:00
|
|
|
SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
|
2012-05-18 02:35:10 +08:00
|
|
|
// Schedule as far as possible in the direction of no choice. This is most
|
|
|
|
// efficient, but also provides the best heuristics for CriticalPSets.
|
2012-05-25 06:11:09 +08:00
|
|
|
if (SUnit *SU = Bot.pickOnlyChoice()) {
|
2012-05-18 02:35:10 +08:00
|
|
|
IsTopNode = false;
|
2016-05-28 06:14:26 +08:00
|
|
|
tracePick(Only1, false);
|
2012-05-25 06:11:09 +08:00
|
|
|
return SU;
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
2012-05-25 06:11:09 +08:00
|
|
|
if (SUnit *SU = Top.pickOnlyChoice()) {
|
2012-05-18 02:35:10 +08:00
|
|
|
IsTopNode = true;
|
2016-05-28 06:14:26 +08:00
|
|
|
tracePick(Only1, true);
|
2012-05-25 06:11:09 +08:00
|
|
|
return SU;
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
2013-12-07 13:59:44 +08:00
|
|
|
// Set the bottom-up policy based on the state of the current bottom zone and
|
|
|
|
// the instructions outside the zone, including the top zone.
|
2016-06-25 08:23:00 +08:00
|
|
|
CandPolicy BotPolicy;
|
|
|
|
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
|
2013-12-07 13:59:44 +08:00
|
|
|
// Set the top-down policy based on the state of the current top zone and
|
|
|
|
// the instructions outside the zone, including the bottom zone.
|
2016-06-25 08:23:00 +08:00
|
|
|
CandPolicy TopPolicy;
|
|
|
|
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
|
2012-11-07 15:05:09 +08:00
|
|
|
|
2016-06-25 10:03:36 +08:00
|
|
|
// See if BotCand is still valid (because we previously scheduled from Top).
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
|
2016-06-25 10:03:36 +08:00
|
|
|
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
|
|
|
|
BotCand.Policy != BotPolicy) {
|
|
|
|
BotCand.reset(CandPolicy());
|
|
|
|
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
|
|
|
|
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
|
|
|
|
} else {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(traceCandidate(BotCand));
|
2016-06-25 10:03:36 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
if (VerifyScheduling) {
|
|
|
|
SchedCandidate TCand;
|
|
|
|
TCand.reset(CandPolicy());
|
|
|
|
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
|
|
|
|
assert(TCand.SU == BotCand.SU &&
|
|
|
|
"Last pick result should correspond to re-picking right now");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
2012-05-18 02:35:10 +08:00
|
|
|
|
|
|
|
// Check if the top Q has a better candidate.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
|
2016-06-25 10:03:36 +08:00
|
|
|
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
|
|
|
|
TopCand.Policy != TopPolicy) {
|
|
|
|
TopCand.reset(CandPolicy());
|
|
|
|
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
|
|
|
|
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
|
|
|
|
} else {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(traceCandidate(TopCand));
|
2016-06-25 10:03:36 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
if (VerifyScheduling) {
|
|
|
|
SchedCandidate TCand;
|
|
|
|
TCand.reset(CandPolicy());
|
|
|
|
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
|
|
|
|
assert(TCand.SU == TopCand.SU &&
|
|
|
|
"Last pick result should correspond to re-picking right now");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pick best from BotCand and TopCand.
|
|
|
|
assert(BotCand.isValid());
|
|
|
|
assert(TopCand.isValid());
|
|
|
|
SchedCandidate Cand = BotCand;
|
|
|
|
TopCand.Reason = NoCand;
|
|
|
|
tryCandidate(Cand, TopCand, nullptr);
|
|
|
|
if (TopCand.Reason != NoCand) {
|
|
|
|
Cand.setBest(TopCand);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(traceCandidate(Cand));
|
2016-06-25 10:03:36 +08:00
|
|
|
}
|
2012-05-18 02:35:10 +08:00
|
|
|
|
2016-06-25 08:23:00 +08:00
|
|
|
IsTopNode = Cand.AtTop;
|
|
|
|
tracePick(Cand);
|
|
|
|
return Cand.SU;
|
2012-05-18 02:35:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
|
2013-09-20 07:10:59 +08:00
|
|
|
SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
|
2012-05-11 05:06:16 +08:00
|
|
|
if (DAG->top() == DAG->bottom()) {
|
2012-05-25 06:11:09 +08:00
|
|
|
assert(Top.Available.empty() && Top.Pending.empty() &&
|
|
|
|
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2012-05-11 05:06:16 +08:00
|
|
|
}
|
|
|
|
SUnit *SU;
|
2012-10-09 02:53:53 +08:00
|
|
|
do {
|
2013-09-07 01:32:34 +08:00
|
|
|
if (RegionPolicy.OnlyTopDown) {
|
2012-10-09 02:53:53 +08:00
|
|
|
SU = Top.pickOnlyChoice();
|
|
|
|
if (!SU) {
|
2012-11-07 15:05:09 +08:00
|
|
|
CandPolicy NoPolicy;
|
2016-06-25 10:03:36 +08:00
|
|
|
TopCand.reset(NoPolicy);
|
2016-06-25 08:23:00 +08:00
|
|
|
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
|
2013-09-05 05:00:13 +08:00
|
|
|
assert(TopCand.Reason != NoCand && "failed to find a candidate");
|
2016-06-25 08:23:00 +08:00
|
|
|
tracePick(TopCand);
|
2012-10-09 02:53:53 +08:00
|
|
|
SU = TopCand.SU;
|
|
|
|
}
|
|
|
|
IsTopNode = true;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else if (RegionPolicy.OnlyBottomUp) {
|
2012-10-09 02:53:53 +08:00
|
|
|
SU = Bot.pickOnlyChoice();
|
|
|
|
if (!SU) {
|
2012-11-07 15:05:09 +08:00
|
|
|
CandPolicy NoPolicy;
|
2016-06-25 10:03:36 +08:00
|
|
|
BotCand.reset(NoPolicy);
|
2016-06-25 08:23:00 +08:00
|
|
|
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
|
2013-09-05 05:00:13 +08:00
|
|
|
assert(BotCand.Reason != NoCand && "failed to find a candidate");
|
2016-06-25 08:23:00 +08:00
|
|
|
tracePick(BotCand);
|
2012-10-09 02:53:53 +08:00
|
|
|
SU = BotCand.SU;
|
|
|
|
}
|
|
|
|
IsTopNode = false;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2012-11-07 15:05:09 +08:00
|
|
|
SU = pickNodeBidirectional(IsTopNode);
|
2012-10-09 02:53:53 +08:00
|
|
|
}
|
|
|
|
} while (SU->isScheduled);
|
|
|
|
|
2012-05-25 06:11:09 +08:00
|
|
|
if (SU->isTopReady())
|
|
|
|
Top.removeReady(SU);
|
|
|
|
if (SU->isBottomReady())
|
|
|
|
Bot.removeReady(SU);
|
2012-05-25 10:02:39 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
|
|
|
|
<< *SU->getInstr());
|
2012-05-25 06:11:09 +08:00
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
|
2013-04-13 14:07:40 +08:00
|
|
|
MachineBasicBlock::iterator InsertPos = SU->getInstr();
|
|
|
|
if (!isTop)
|
|
|
|
++InsertPos;
|
|
|
|
SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
|
|
|
|
|
|
|
|
// Find already scheduled copies with a single physreg dependence and move
|
|
|
|
// them just above the scheduled instruction.
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SDep &Dep : Deps) {
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Dep.getKind() != SDep::Data ||
|
|
|
|
!Register::isPhysicalRegister(Dep.getReg()))
|
2013-04-13 14:07:40 +08:00
|
|
|
continue;
|
2017-06-21 17:10:10 +08:00
|
|
|
SUnit *DepSU = Dep.getSUnit();
|
2013-04-13 14:07:40 +08:00
|
|
|
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
|
|
|
|
continue;
|
|
|
|
MachineInstr *Copy = DepSU->getInstr();
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
if (!Copy->isCopy() && !Copy->isMoveImmediate())
|
2013-04-13 14:07:40 +08:00
|
|
|
continue;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
|
2018-09-19 08:23:35 +08:00
|
|
|
DAG->dumpNode(*Dep.getSUnit()));
|
2013-04-13 14:07:40 +08:00
|
|
|
DAG->moveInstruction(Copy, InsertPos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-25 06:11:09 +08:00
|
|
|
/// Update the scheduler's state after scheduling a node. This is the same node
|
2013-12-29 05:56:57 +08:00
|
|
|
/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
|
|
|
|
/// update it's state based on the current cycle before MachineSchedStrategy
|
|
|
|
/// does.
|
2013-04-13 14:07:40 +08:00
|
|
|
///
|
|
|
|
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
/// them here. See comments in biasPhysReg.
|
2013-09-20 07:10:59 +08:00
|
|
|
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
2012-06-06 05:11:27 +08:00
|
|
|
if (IsTopNode) {
|
2013-12-07 13:59:44 +08:00
|
|
|
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
|
2012-06-29 11:23:22 +08:00
|
|
|
Top.bumpNode(SU);
|
2013-04-13 14:07:40 +08:00
|
|
|
if (SU->hasPhysRegUses)
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
reschedulePhysReg(SU, true);
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2013-12-07 13:59:44 +08:00
|
|
|
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
|
2012-06-29 11:23:22 +08:00
|
|
|
Bot.bumpNode(SU);
|
2013-04-13 14:07:40 +08:00
|
|
|
if (SU->hasPhysRegDefs)
|
Bias physical register immediate assignments
The machine scheduler currently biases register copies to/from
physical registers to be closer to their point of use / def to
minimize their live ranges. This change extends this to also physical
register assignments from immediate values.
This causes a reduction in reduction in overall register pressure and
minor reduction in spills and indirectly fixes an out-of-registers
assertion (PR39391).
Most test changes are from minor instruction reorderings and register
name selection changes and direct consequences of that.
Reviewers: MatzeB, qcolombet, myatsina, pcc
Subscribers: nemanjai, jvesely, nhaehnle, eraman, hiraditya,
javed.absar, arphaman, jfb, jsji, llvm-commits
Differential Revision: https://reviews.llvm.org/D54218
llvm-svn: 346894
2018-11-15 05:11:53 +08:00
|
|
|
reschedulePhysReg(SU, false);
|
2012-05-11 05:06:16 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
/// Create the standard converging machine scheduler. This will be used as the
|
|
|
|
/// default scheduler if the target does not set a default.
|
2016-11-29 04:11:54 +08:00
|
|
|
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
|
2017-02-23 06:32:51 +08:00
|
|
|
ScheduleDAGMILive *DAG =
|
2019-08-15 23:54:37 +08:00
|
|
|
new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
|
2012-11-13 03:40:10 +08:00
|
|
|
// Register DAG post-processors.
|
2013-04-24 23:54:43 +08:00
|
|
|
//
|
|
|
|
// FIXME: extend the mutation API to allow earlier mutations to instantiate
|
|
|
|
// data and pass it to later mutations. Have a single mutation that gathers
|
|
|
|
// the interesting nodes in one pass.
|
2016-08-20 03:59:18 +08:00
|
|
|
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
|
2012-11-13 03:40:10 +08:00
|
|
|
return DAG;
|
2012-01-17 14:55:03 +08:00
|
|
|
}
|
2013-12-29 05:56:57 +08:00
|
|
|
|
2020-10-06 03:43:50 +08:00
|
|
|
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
|
2016-11-29 04:11:54 +08:00
|
|
|
return createGenericSchedLive(C);
|
|
|
|
}
|
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
static MachineSchedRegistry
|
2013-09-20 07:10:59 +08:00
|
|
|
GenericSchedRegistry("converge", "Standard converging scheduler.",
|
2020-10-06 03:43:50 +08:00
|
|
|
createConvergingSched);
|
2013-12-29 05:56:57 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-06-04 15:06:18 +08:00
|
|
|
void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
|
|
|
|
DAG = Dag;
|
|
|
|
SchedModel = DAG->getSchedModel();
|
|
|
|
TRI = DAG->TRI;
|
2013-12-29 05:56:57 +08:00
|
|
|
|
2014-06-04 15:06:18 +08:00
|
|
|
Rem.init(DAG, SchedModel);
|
|
|
|
Top.init(DAG, SchedModel, &Rem);
|
|
|
|
BotRoots.clear();
|
2013-12-29 05:56:57 +08:00
|
|
|
|
2014-06-04 15:06:18 +08:00
|
|
|
// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
|
|
|
|
// or are disabled, then these HazardRecs will be disabled.
|
|
|
|
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
|
|
|
|
if (!Top.HazardRec) {
|
|
|
|
Top.HazardRec =
|
2014-10-14 14:56:25 +08:00
|
|
|
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
|
2014-08-05 05:25:23 +08:00
|
|
|
Itin, DAG);
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
2014-06-04 15:06:18 +08:00
|
|
|
}
|
2013-12-29 05:56:57 +08:00
|
|
|
|
|
|
|
void PostGenericScheduler::registerRoots() {
|
|
|
|
Rem.CriticalPath = DAG->ExitSU.getDepth();
|
|
|
|
|
|
|
|
// Some roots may not feed into ExitSU. Check all of them in case.
|
2017-06-21 17:10:10 +08:00
|
|
|
for (const SUnit *SU : BotRoots) {
|
|
|
|
if (SU->getDepth() > Rem.CriticalPath)
|
|
|
|
Rem.CriticalPath = SU->getDepth();
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
|
2014-08-08 05:49:44 +08:00
|
|
|
if (DumpCriticalPathLength) {
|
|
|
|
errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
|
|
|
|
}
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
|
2018-06-20 13:29:26 +08:00
|
|
|
/// Apply a set of heuristics to a new candidate for PostRA scheduling.
|
2013-12-29 05:56:57 +08:00
|
|
|
///
|
|
|
|
/// \param Cand provides the policy and current best candidate.
|
|
|
|
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
|
|
|
|
void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
|
|
|
|
SchedCandidate &TryCand) {
|
|
|
|
// Initialize the candidate if needed.
|
|
|
|
if (!Cand.isValid()) {
|
|
|
|
TryCand.Reason = NodeOrder;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prioritize instructions that read unbuffered resources by stall cycles.
|
|
|
|
if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
|
|
|
|
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
|
|
|
|
return;
|
[AArch64] Make instruction fusion more aggressive.
Summary:
This patch makes instruction fusion more aggressive by
* adding artificial edges between the successors of FirstSU and
SecondSU, similar to BaseMemOpClusterMutation::clusterNeighboringMemOps.
* updating PostGenericScheduler::tryCandidate to keep clusters together,
similar to GenericScheduler::tryCandidate.
This change increases the number of AES instruction pairs generated on
Cortex-A57 and Cortex-A72. This doesn't change code at all in
most benchmarks or general code, but we've seen improvement on kernels
using AESE/AESMC and AESD/AESIMC.
Reviewers: evandro, kristof.beyls, t.p.northover, silviu.baranga, atrick, rengolin, MatzeB
Reviewed By: evandro
Subscribers: aemerson, rengolin, MatzeB, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D33230
llvm-svn: 303618
2017-05-23 17:33:34 +08:00
|
|
|
|
|
|
|
// Keep clustered nodes together.
|
|
|
|
if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
|
|
|
|
Cand.SU == DAG->getNextClusterSucc(),
|
|
|
|
TryCand, Cand, Cluster))
|
|
|
|
return;
|
2013-12-29 05:56:57 +08:00
|
|
|
|
|
|
|
// Avoid critical resource consumption and balance the schedule.
|
|
|
|
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
|
|
|
|
TryCand, Cand, ResourceReduce))
|
|
|
|
return;
|
|
|
|
if (tryGreater(TryCand.ResDelta.DemandedResources,
|
|
|
|
Cand.ResDelta.DemandedResources,
|
|
|
|
TryCand, Cand, ResourceDemand))
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Avoid serializing long latency dependence chains.
|
|
|
|
if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fall through to original instruction order.
|
|
|
|
if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
|
|
|
|
TryCand.Reason = NodeOrder;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
|
|
|
|
ReadyQueue &Q = Top.Available;
|
2017-06-21 17:10:10 +08:00
|
|
|
for (SUnit *SU : Q) {
|
2013-12-29 05:56:57 +08:00
|
|
|
SchedCandidate TryCand(Cand.Policy);
|
2017-06-21 17:10:10 +08:00
|
|
|
TryCand.SU = SU;
|
2016-06-25 08:23:00 +08:00
|
|
|
TryCand.AtTop = true;
|
2013-12-29 05:56:57 +08:00
|
|
|
TryCand.initResourceDelta(DAG, SchedModel);
|
|
|
|
tryCandidate(Cand, TryCand);
|
|
|
|
if (TryCand.Reason != NoCand) {
|
|
|
|
Cand.setBest(TryCand);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(traceCandidate(Cand));
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Pick the next node to schedule.
|
|
|
|
SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
|
|
|
|
if (DAG->top() == DAG->bottom()) {
|
|
|
|
assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
|
|
|
SUnit *SU;
|
|
|
|
do {
|
|
|
|
SU = Top.pickOnlyChoice();
|
2016-05-28 06:14:26 +08:00
|
|
|
if (SU) {
|
|
|
|
tracePick(Only1, true);
|
|
|
|
} else {
|
2013-12-29 05:56:57 +08:00
|
|
|
CandPolicy NoPolicy;
|
|
|
|
SchedCandidate TopCand(NoPolicy);
|
|
|
|
// Set the top-down policy based on the state of the current top zone and
|
|
|
|
// the instructions outside the zone, including the bottom zone.
|
2014-04-14 08:51:57 +08:00
|
|
|
setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
|
2013-12-29 05:56:57 +08:00
|
|
|
pickNodeFromQueue(TopCand);
|
|
|
|
assert(TopCand.Reason != NoCand && "failed to find a candidate");
|
2016-06-25 08:23:00 +08:00
|
|
|
tracePick(TopCand);
|
2013-12-29 05:56:57 +08:00
|
|
|
SU = TopCand.SU;
|
|
|
|
}
|
|
|
|
} while (SU->isScheduled);
|
|
|
|
|
|
|
|
IsTopNode = true;
|
|
|
|
Top.removeReady(SU);
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
|
|
|
|
<< *SU->getInstr());
|
2013-12-29 05:56:57 +08:00
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called after ScheduleDAGMI has scheduled an instruction and updated
|
|
|
|
/// scheduled/remaining flags in the DAG nodes.
|
|
|
|
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
|
|
|
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
|
|
|
|
Top.bumpNode(SU);
|
|
|
|
}
|
|
|
|
|
2016-11-29 04:11:54 +08:00
|
|
|
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
|
2016-11-09 17:59:27 +08:00
|
|
|
/*RemoveKillFlags=*/true);
|
2013-12-29 05:56:57 +08:00
|
|
|
}
|
2012-01-17 14:55:03 +08:00
|
|
|
|
2012-10-16 02:02:27 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// ILP Scheduler. Currently for experimental analysis of heuristics.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
namespace {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Order nodes by the ILP metric.
|
2012-10-16 02:02:27 +08:00
|
|
|
struct ILPOrder {
|
2017-02-23 06:32:51 +08:00
|
|
|
const SchedDFSResult *DFSResult = nullptr;
|
|
|
|
const BitVector *ScheduledTrees = nullptr;
|
2012-10-16 02:02:27 +08:00
|
|
|
bool MaximizeILP;
|
|
|
|
|
2017-02-23 06:32:51 +08:00
|
|
|
ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
|
2012-10-16 02:02:27 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Apply a less-than relation on node priority.
|
2012-11-28 13:13:28 +08:00
|
|
|
///
|
|
|
|
/// (Return true if A comes after B in the Q.)
|
2012-10-16 02:02:27 +08:00
|
|
|
bool operator()(const SUnit *A, const SUnit *B) const {
|
2012-11-28 13:13:28 +08:00
|
|
|
unsigned SchedTreeA = DFSResult->getSubtreeID(A);
|
|
|
|
unsigned SchedTreeB = DFSResult->getSubtreeID(B);
|
|
|
|
if (SchedTreeA != SchedTreeB) {
|
|
|
|
// Unscheduled trees have lower priority.
|
|
|
|
if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
|
|
|
|
return ScheduledTrees->test(SchedTreeB);
|
|
|
|
|
|
|
|
// Trees with shallower connections have have lower priority.
|
|
|
|
if (DFSResult->getSubtreeLevel(SchedTreeA)
|
|
|
|
!= DFSResult->getSubtreeLevel(SchedTreeB)) {
|
|
|
|
return DFSResult->getSubtreeLevel(SchedTreeA)
|
|
|
|
< DFSResult->getSubtreeLevel(SchedTreeB);
|
|
|
|
}
|
|
|
|
}
|
2012-10-16 02:02:27 +08:00
|
|
|
if (MaximizeILP)
|
2012-11-28 13:13:28 +08:00
|
|
|
return DFSResult->getILP(A) < DFSResult->getILP(B);
|
2012-10-16 02:02:27 +08:00
|
|
|
else
|
2012-11-28 13:13:28 +08:00
|
|
|
return DFSResult->getILP(A) > DFSResult->getILP(B);
|
2012-10-16 02:02:27 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Schedule based on the ILP metric.
|
2012-10-16 02:02:27 +08:00
|
|
|
class ILPScheduler : public MachineSchedStrategy {
|
2017-02-23 06:32:51 +08:00
|
|
|
ScheduleDAGMILive *DAG = nullptr;
|
2012-10-16 02:02:27 +08:00
|
|
|
ILPOrder Cmp;
|
|
|
|
|
|
|
|
std::vector<SUnit*> ReadyQ;
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-10-16 02:02:27 +08:00
|
|
|
public:
|
2017-02-23 06:32:51 +08:00
|
|
|
ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
|
2012-10-16 02:02:27 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void initialize(ScheduleDAGMI *dag) override {
|
2013-12-29 05:56:47 +08:00
|
|
|
assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
|
|
|
|
DAG = static_cast<ScheduleDAGMILive*>(dag);
|
2013-01-25 14:33:57 +08:00
|
|
|
DAG->computeDFSResult();
|
2013-01-25 12:01:04 +08:00
|
|
|
Cmp.DFSResult = DAG->getDFSResult();
|
|
|
|
Cmp.ScheduledTrees = &DAG->getScheduledTrees();
|
2012-10-16 02:02:27 +08:00
|
|
|
ReadyQ.clear();
|
|
|
|
}
|
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void registerRoots() override {
|
2012-11-29 22:36:26 +08:00
|
|
|
// Restore the heap in ReadyQ with the updated DFS results.
|
|
|
|
std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
2012-10-16 02:02:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement MachineSchedStrategy interface.
|
|
|
|
/// -----------------------------------------
|
|
|
|
|
2012-11-28 13:13:28 +08:00
|
|
|
/// Callback to select the highest priority node from the ready Q.
|
2014-03-07 17:26:03 +08:00
|
|
|
SUnit *pickNode(bool &IsTopNode) override {
|
2014-04-14 08:51:57 +08:00
|
|
|
if (ReadyQ.empty()) return nullptr;
|
2013-03-21 08:57:21 +08:00
|
|
|
std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
2012-10-16 02:02:27 +08:00
|
|
|
SUnit *SU = ReadyQ.back();
|
|
|
|
ReadyQ.pop_back();
|
|
|
|
IsTopNode = false;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Pick node "
|
|
|
|
<< "SU(" << SU->NodeNum << ") "
|
|
|
|
<< " ILP: " << DAG->getDFSResult()->getILP(SU)
|
|
|
|
<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
|
|
|
|
<< " @"
|
|
|
|
<< DAG->getDFSResult()->getSubtreeLevel(
|
|
|
|
DAG->getDFSResult()->getSubtreeID(SU))
|
|
|
|
<< '\n'
|
|
|
|
<< "Scheduling " << *SU->getInstr());
|
2012-10-16 02:02:27 +08:00
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Scheduler callback to notify that a new subtree is scheduled.
|
2014-03-07 17:26:03 +08:00
|
|
|
void scheduleTree(unsigned SubtreeID) override {
|
2013-01-25 12:01:04 +08:00
|
|
|
std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
|
|
|
}
|
|
|
|
|
2012-11-28 13:13:28 +08:00
|
|
|
/// Callback after a node is scheduled. Mark a newly scheduled tree, notify
|
|
|
|
/// DFSResults, and resort the priority Q.
|
2014-03-07 17:26:03 +08:00
|
|
|
void schedNode(SUnit *SU, bool IsTopNode) override {
|
2012-11-28 13:13:28 +08:00
|
|
|
assert(!IsTopNode && "SchedDFSResult needs bottom-up");
|
|
|
|
}
|
2012-10-16 02:02:27 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
|
2012-10-16 02:02:27 +08:00
|
|
|
|
2014-03-07 17:26:03 +08:00
|
|
|
void releaseBottomNode(SUnit *SU) override {
|
2012-10-16 02:02:27 +08:00
|
|
|
ReadyQ.push_back(SU);
|
|
|
|
std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
|
|
|
}
|
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end anonymous namespace
|
2012-10-16 02:02:27 +08:00
|
|
|
|
|
|
|
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
|
2012-10-16 02:02:27 +08:00
|
|
|
}
|
|
|
|
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
|
2019-08-15 23:54:37 +08:00
|
|
|
return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
|
2012-10-16 02:02:27 +08:00
|
|
|
}
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-10-16 02:02:27 +08:00
|
|
|
static MachineSchedRegistry ILPMaxRegistry(
|
|
|
|
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
|
|
|
|
static MachineSchedRegistry ILPMinRegistry(
|
|
|
|
"ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
|
|
|
|
|
2012-01-14 10:17:06 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Machine Instruction Shuffler for Correctness Testing
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2012-01-13 14:30:30 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
namespace {
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
/// Apply a less-than relation on the node order, which corresponds to the
|
|
|
|
/// instruction order prior to scheduling. IsReverse implements greater-than.
|
|
|
|
template<bool IsReverse>
|
|
|
|
struct SUnitOrder {
|
2012-01-17 14:55:07 +08:00
|
|
|
bool operator()(SUnit *A, SUnit *B) const {
|
2012-03-14 12:00:41 +08:00
|
|
|
if (IsReverse)
|
|
|
|
return A->NodeNum > B->NodeNum;
|
|
|
|
else
|
|
|
|
return A->NodeNum < B->NodeNum;
|
2012-01-17 14:55:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-01-13 14:30:30 +08:00
|
|
|
/// Reorder instructions as much as possible.
|
2012-03-14 12:00:41 +08:00
|
|
|
class InstructionShuffler : public MachineSchedStrategy {
|
|
|
|
bool IsAlternating;
|
|
|
|
bool IsTopDown;
|
|
|
|
|
|
|
|
// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
|
|
|
|
// gives nodes with a higher number higher priority causing the latest
|
|
|
|
// instructions to be scheduled first.
|
2017-02-23 06:32:51 +08:00
|
|
|
PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
|
2012-03-14 12:00:41 +08:00
|
|
|
TopQ;
|
2017-09-12 07:00:48 +08:00
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
// When scheduling bottom-up, use greater-than as the queue priority.
|
2017-02-23 06:32:51 +08:00
|
|
|
PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
|
2012-03-14 12:00:41 +08:00
|
|
|
BottomQ;
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-01-13 14:30:30 +08:00
|
|
|
public:
|
2012-03-14 12:00:41 +08:00
|
|
|
InstructionShuffler(bool alternate, bool topdown)
|
|
|
|
: IsAlternating(alternate), IsTopDown(topdown) {}
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2014-04-29 15:58:41 +08:00
|
|
|
void initialize(ScheduleDAGMI*) override {
|
2012-03-14 12:00:41 +08:00
|
|
|
TopQ.clear();
|
|
|
|
BottomQ.clear();
|
|
|
|
}
|
2012-01-17 14:55:07 +08:00
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
/// Implement MachineSchedStrategy interface.
|
|
|
|
/// -----------------------------------------
|
|
|
|
|
2014-04-29 15:58:41 +08:00
|
|
|
SUnit *pickNode(bool &IsTopNode) override {
|
2012-03-14 12:00:41 +08:00
|
|
|
SUnit *SU;
|
|
|
|
if (IsTopDown) {
|
|
|
|
do {
|
2014-04-14 08:51:57 +08:00
|
|
|
if (TopQ.empty()) return nullptr;
|
2012-03-14 12:00:41 +08:00
|
|
|
SU = TopQ.top();
|
|
|
|
TopQ.pop();
|
|
|
|
} while (SU->isScheduled);
|
|
|
|
IsTopNode = true;
|
2016-04-21 09:54:13 +08:00
|
|
|
} else {
|
2012-03-14 12:00:41 +08:00
|
|
|
do {
|
2014-04-14 08:51:57 +08:00
|
|
|
if (BottomQ.empty()) return nullptr;
|
2012-03-14 12:00:41 +08:00
|
|
|
SU = BottomQ.top();
|
|
|
|
BottomQ.pop();
|
|
|
|
} while (SU->isScheduled);
|
|
|
|
IsTopNode = false;
|
|
|
|
}
|
|
|
|
if (IsAlternating)
|
|
|
|
IsTopDown = !IsTopDown;
|
2012-01-17 14:55:07 +08:00
|
|
|
return SU;
|
|
|
|
}
|
|
|
|
|
2014-04-29 15:58:41 +08:00
|
|
|
void schedNode(SUnit *SU, bool IsTopNode) override {}
|
2012-05-25 06:11:09 +08:00
|
|
|
|
2014-04-29 15:58:41 +08:00
|
|
|
void releaseTopNode(SUnit *SU) override {
|
2012-03-14 12:00:41 +08:00
|
|
|
TopQ.push(SU);
|
|
|
|
}
|
2014-04-29 15:58:41 +08:00
|
|
|
void releaseBottomNode(SUnit *SU) override {
|
2012-03-14 12:00:41 +08:00
|
|
|
BottomQ.push(SU);
|
2012-01-13 14:30:30 +08:00
|
|
|
}
|
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end anonymous namespace
|
2012-01-13 14:30:30 +08:00
|
|
|
|
2012-03-08 09:41:12 +08:00
|
|
|
static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
|
2012-03-14 12:00:41 +08:00
|
|
|
bool Alternate = !ForceTopDown && !ForceBottomUp;
|
|
|
|
bool TopDown = !ForceBottomUp;
|
2012-03-14 19:26:37 +08:00
|
|
|
assert((TopDown || !ForceTopDown) &&
|
2012-03-14 12:00:41 +08:00
|
|
|
"-misched-topdown incompatible with -misched-bottomup");
|
2017-02-23 06:32:51 +08:00
|
|
|
return new ScheduleDAGMILive(
|
2019-08-15 23:54:37 +08:00
|
|
|
C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
|
2012-01-13 14:30:30 +08:00
|
|
|
}
|
2017-02-23 06:32:51 +08:00
|
|
|
|
2012-03-14 12:00:41 +08:00
|
|
|
static MachineSchedRegistry ShufflerRegistry(
|
|
|
|
"shuffle", "Shuffle machine instructions alternating directions",
|
|
|
|
createInstructionShuffler);
|
2012-01-13 14:30:30 +08:00
|
|
|
#endif // !NDEBUG
|
2013-01-25 15:45:29 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2013-12-29 05:56:47 +08:00
|
|
|
// GraphWriter support for ScheduleDAGMILive.
|
2013-01-25 15:45:29 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
template<> struct GraphTraits<
|
|
|
|
ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
|
|
|
|
|
|
|
|
template<>
|
|
|
|
struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
|
2017-02-23 06:32:51 +08:00
|
|
|
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
|
2013-01-25 15:45:29 +08:00
|
|
|
|
|
|
|
static std::string getGraphName(const ScheduleDAG *G) {
|
2020-01-29 03:23:46 +08:00
|
|
|
return std::string(G->MF.getName());
|
2013-01-25 15:45:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool renderGraphFromBottomUp() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isNodeHidden(const SUnit *Node) {
|
2015-09-18 05:09:59 +08:00
|
|
|
if (ViewMISchedCutoff == 0)
|
|
|
|
return false;
|
|
|
|
return (Node->Preds.size() > ViewMISchedCutoff
|
|
|
|
|| Node->Succs.size() > ViewMISchedCutoff);
|
2013-01-25 15:45:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// If you want to override the dot attributes printed for a particular
|
|
|
|
/// edge, override this method.
|
|
|
|
static std::string getEdgeAttributes(const SUnit *Node,
|
|
|
|
SUnitIterator EI,
|
|
|
|
const ScheduleDAG *Graph) {
|
|
|
|
if (EI.isArtificialDep())
|
|
|
|
return "color=cyan,style=dashed";
|
|
|
|
if (EI.isCtrlDep())
|
|
|
|
return "color=blue,style=dashed";
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
|
2014-06-27 06:52:05 +08:00
|
|
|
std::string Str;
|
|
|
|
raw_string_ostream SS(Str);
|
2013-12-29 05:56:47 +08:00
|
|
|
const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
|
|
|
|
const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
|
2014-04-14 08:51:57 +08:00
|
|
|
static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
|
2013-09-07 01:32:42 +08:00
|
|
|
SS << "SU:" << SU->NodeNum;
|
|
|
|
if (DFS)
|
|
|
|
SS << " I:" << DFS->getNumInstrs(SU);
|
2013-01-25 15:45:29 +08:00
|
|
|
return SS.str();
|
|
|
|
}
|
2017-09-12 07:00:48 +08:00
|
|
|
|
2013-01-25 15:45:29 +08:00
|
|
|
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
|
|
|
|
return G->getGraphNodeLabel(SU);
|
|
|
|
}
|
|
|
|
|
2013-12-29 05:56:47 +08:00
|
|
|
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
|
2013-01-25 15:45:29 +08:00
|
|
|
std::string Str("shape=Mrecord");
|
2013-12-29 05:56:47 +08:00
|
|
|
const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
|
|
|
|
const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
|
2014-04-14 08:51:57 +08:00
|
|
|
static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
|
2013-01-25 15:45:29 +08:00
|
|
|
if (DFS) {
|
|
|
|
Str += ",style=filled,fillcolor=\"#";
|
|
|
|
Str += DOT::getColorString(DFS->getSubtreeID(N));
|
|
|
|
Str += '"';
|
|
|
|
}
|
|
|
|
return Str;
|
|
|
|
}
|
|
|
|
};
|
2017-02-23 06:32:51 +08:00
|
|
|
|
|
|
|
} // end namespace llvm
|
2013-01-25 15:45:29 +08:00
|
|
|
#endif // NDEBUG
|
|
|
|
|
|
|
|
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
|
|
|
|
/// rendered using 'dot'.
|
|
|
|
void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
ViewGraph(this, Name, false, Title);
|
|
|
|
#else
|
|
|
|
errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
|
|
|
|
<< "systems with Graphviz or gv!\n";
|
|
|
|
#endif // NDEBUG
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Out-of-line implementation with no arguments is handy for gdb.
|
|
|
|
void ScheduleDAGMI::viewGraph() {
|
|
|
|
viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
|
|
|
|
}
|