2017-06-07 06:22:41 +08:00
|
|
|
//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
|
2010-12-08 11:26:16 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2010-12-08 11:26:16 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines the RAGreedy function pass for register allocation in
|
|
|
|
// optimized builds.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2021-12-22 00:44:01 +08:00
|
|
|
#include "RegAllocGreedy.h"
|
2010-12-11 06:21:05 +08:00
|
|
|
#include "AllocationOrder.h"
|
2011-04-02 14:03:35 +08:00
|
|
|
#include "InterferenceCache.h"
|
2011-04-06 05:40:37 +08:00
|
|
|
#include "LiveDebugVariables.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "RegAllocBase.h"
|
2021-11-16 03:42:04 +08:00
|
|
|
#include "RegAllocEvictionAdvisor.h"
|
2011-01-19 05:13:27 +08:00
|
|
|
#include "SpillPlacement.h"
|
2010-12-16 07:46:13 +08:00
|
|
|
#include "SplitKit.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/BitVector.h"
|
|
|
|
#include "llvm/ADT/IndexedMap.h"
|
|
|
|
#include "llvm/ADT/SetVector.h"
|
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/SmallSet.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2011-02-18 06:53:48 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
2017-10-10 07:19:02 +08:00
|
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/CodeGen/CalcSpillWeights.h"
|
2011-01-19 05:13:27 +08:00
|
|
|
#include "llvm/CodeGen/EdgeBundles.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/LiveInterval.h"
|
|
|
|
#include "llvm/CodeGen/LiveIntervalUnion.h"
|
2017-12-13 10:51:04 +08:00
|
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
2012-04-03 06:44:18 +08:00
|
|
|
#include "llvm/CodeGen/LiveRangeEdit.h"
|
2012-11-29 03:13:06 +08:00
|
|
|
#include "llvm/CodeGen/LiveRegMatrix.h"
|
2017-12-19 07:19:44 +08:00
|
|
|
#include "llvm/CodeGen/LiveStacks.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2013-06-18 03:00:36 +08:00
|
|
|
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
2010-12-18 07:16:32 +08:00
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
2017-01-26 07:20:33 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2017-01-26 07:20:33 +08:00
|
|
|
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/RegAllocRegistry.h"
|
2014-01-03 06:47:22 +08:00
|
|
|
#include "llvm/CodeGen/RegisterClassInfo.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/CodeGen/SlotIndexes.h"
|
2020-03-09 00:36:29 +08:00
|
|
|
#include "llvm/CodeGen/Spiller.h"
|
2017-11-08 09:01:31 +08:00
|
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/CodeGen/VirtRegMap.h"
|
2021-11-16 03:42:04 +08:00
|
|
|
#include "llvm/IR/DebugInfoMetadata.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
2014-04-04 10:05:21 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2022-03-15 17:54:19 +08:00
|
|
|
#include "llvm/InitializePasses.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/BlockFrequency.h"
|
2014-04-09 03:18:56 +08:00
|
|
|
#include "llvm/Support/BranchProbability.h"
|
2011-07-27 07:41:46 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2010-12-08 11:26:16 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2010-12-11 08:19:56 +08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-06-07 06:22:41 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <utility>
|
2011-02-23 07:01:52 +08:00
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:02:50 +08:00
|
|
|
#define DEBUG_TYPE "regalloc"
|
|
|
|
|
2011-02-18 06:53:48 +08:00
|
|
|
STATISTIC(NumGlobalSplits, "Number of split global live ranges");
|
|
|
|
STATISTIC(NumLocalSplits, "Number of split local live ranges");
|
|
|
|
STATISTIC(NumEvicted, "Number of interferences evicted");
|
|
|
|
|
2016-04-13 11:08:27 +08:00
|
|
|
static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
|
|
|
|
"split-spill-mode", cl::Hidden,
|
|
|
|
cl::desc("Spill mode for splitting live ranges"),
|
|
|
|
cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
|
|
|
|
clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
|
2016-10-09 03:41:06 +08:00
|
|
|
clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
|
2016-04-13 11:08:27 +08:00
|
|
|
cl::init(SplitEditor::SM_Speed));
|
Add an interface for SplitKit complement spill modes.
SplitKit always computes a complement live range to cover the places
where the original live range was live, but no explicit region has been
allocated.
Currently, the complement live range is created to be as small as
possible - it never overlaps any of the regions. This minimizes
register pressure, but if the complement is going to be spilled anyway,
that is not very important. The spiller will eliminate redundant
spills, and hoist others by making the spill slot live range overlap
some of the regions created by splitting. Stack slots are cheap.
This patch adds the interface to enable spill modes in SplitKit. In
spill mode, SplitKit will assume that the complement is going to spill,
so it will allow it to overlap regions in order to avoid back-copies.
By doing some of the spiller's work early, the complement live range
becomes simpler. In some cases, it can become much simpler because no
extra PHI-defs are required. This will speed up both splitting and
spilling.
This is only the interface to enable spill modes, no implementation yet.
llvm-svn: 139500
2011-09-13 00:49:21 +08:00
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
static cl::opt<unsigned>
|
|
|
|
LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
|
|
|
|
cl::desc("Last chance recoloring max depth"),
|
|
|
|
cl::init(5));
|
|
|
|
|
|
|
|
static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
|
|
|
|
"lcr-max-interf", cl::Hidden,
|
|
|
|
cl::desc("Last chance recoloring maximum number of considered"
|
|
|
|
" interference at a time"),
|
|
|
|
cl::init(8));
|
|
|
|
|
2017-12-01 08:53:10 +08:00
|
|
|
static cl::opt<bool> ExhaustiveSearch(
|
|
|
|
"exhaustive-register-search", cl::NotHidden,
|
|
|
|
cl::desc("Exhaustive Search for registers bypassing the depth "
|
|
|
|
"and interference cutoffs of last chance recoloring"),
|
|
|
|
cl::Hidden);
|
2014-04-12 05:39:44 +08:00
|
|
|
|
2015-07-18 07:04:06 +08:00
|
|
|
static cl::opt<bool> EnableDeferredSpilling(
|
|
|
|
"enable-deferred-spilling", cl::Hidden,
|
|
|
|
cl::desc("Instead of spilling a variable right away, defer the actual "
|
|
|
|
"code insertion to the end of the allocation. That way the "
|
|
|
|
"allocator might still find a suitable coloring for this "
|
|
|
|
"variable because of other evicted variables."),
|
|
|
|
cl::init(false));
|
|
|
|
|
2014-03-25 08:16:25 +08:00
|
|
|
// FIXME: Find a good default for this flag and remove the flag.
|
|
|
|
static cl::opt<unsigned>
|
|
|
|
CSRFirstTimeCost("regalloc-csr-first-time-cost",
|
|
|
|
cl::desc("Cost for first time use of callee-saved register."),
|
|
|
|
cl::init(0), cl::Hidden);
|
|
|
|
|
2022-04-05 15:10:00 +08:00
|
|
|
static cl::opt<unsigned long> GrowRegionComplexityBudget(
|
2022-03-01 14:34:45 +08:00
|
|
|
"grow-region-complexity-budget",
|
|
|
|
cl::desc("growRegion() does not scale with the number of BB edges, so "
|
|
|
|
"limit its budget and bail out once we reach the limit."),
|
|
|
|
cl::init(10000), cl::Hidden);
|
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
|
|
|
|
createGreedyRegisterAllocator);
|
|
|
|
|
|
|
|
char RAGreedy::ID = 0;
|
2016-11-15 05:50:13 +08:00
|
|
|
char &llvm::RAGreedyID = RAGreedy::ID;
|
|
|
|
|
|
|
|
INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
|
|
|
|
"Greedy Register Allocator", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
|
2017-01-26 07:20:33 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
|
2021-12-14 14:49:57 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
|
2016-11-15 05:50:13 +08:00
|
|
|
INITIALIZE_PASS_END(RAGreedy, "greedy",
|
|
|
|
"Greedy Register Allocator", false, false)
|
2010-12-08 11:26:16 +08:00
|
|
|
|
2011-05-26 07:58:36 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
const char *const RAGreedy::StageName[] = {
|
2011-07-25 23:25:41 +08:00
|
|
|
"RS_New",
|
|
|
|
"RS_Assign",
|
|
|
|
"RS_Split",
|
2011-07-25 23:25:43 +08:00
|
|
|
"RS_Split2",
|
2011-07-25 23:25:41 +08:00
|
|
|
"RS_Spill",
|
2015-07-18 07:04:06 +08:00
|
|
|
"RS_Memory",
|
2011-07-25 23:25:41 +08:00
|
|
|
"RS_Done"
|
2011-05-26 07:58:36 +08:00
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2011-04-23 06:47:40 +08:00
|
|
|
// Hysteresis to use when comparing floats.
|
|
|
|
// This helps stabilize decisions based on float comparisons.
|
2014-02-04 14:29:38 +08:00
|
|
|
const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
|
2011-04-23 06:47:40 +08:00
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
FunctionPass* llvm::createGreedyRegisterAllocator() {
|
|
|
|
return new RAGreedy();
|
|
|
|
}
|
|
|
|
|
RegAlloc: Allow targets to split register allocation
AMDGPU normally spills SGPRs to VGPRs. Previously, since all register
classes are handled at the same time, this was problematic. We don't
know ahead of time how many registers will be needed to be reserved to
handle the spilling. If no VGPRs were left for spilling, we would have
to try to spill to memory. If the spilled SGPRs were required for exec
mask manipulation, it is highly problematic because the lanes active
at the point of spill are not necessarily the same as at the restore
point.
Avoid this problem by fully allocating SGPRs in a separate regalloc
run from VGPRs. This way we know the exact number of VGPRs needed, and
can reserve them for a second run. This fixes the most serious
issues, but it is still possible using inline asm to make all VGPRs
unavailable. Start erroring in the case where we ever would require
memory for an SGPR spill.
This is implemented by giving each regalloc pass a callback which
reports if a register class should be handled or not. A few passes
need some small changes to deal with leftover virtual registers.
In the AMDGPU implementation, a new pass is introduced to take the
place of PrologEpilogInserter for SGPR spills emitted during the first
run.
One disadvantage of this is currently StackSlotColoring is no longer
used for SGPR spills. It would need to be run again, which will
require more work.
Error if the standard -regalloc option is used. Introduce new separate
-sgpr-regalloc and -vgpr-regalloc flags, so the two runs can be
controlled individually. PBQB is not currently supported, so this also
prevents using the unhandled allocator.
2018-09-27 07:36:28 +08:00
|
|
|
namespace llvm {
|
|
|
|
FunctionPass* createGreedyRegisterAllocator(
|
|
|
|
std::function<bool(const TargetRegisterInfo &TRI,
|
|
|
|
const TargetRegisterClass &RC)> Ftor);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
FunctionPass* llvm::createGreedyRegisterAllocator(
|
|
|
|
std::function<bool(const TargetRegisterInfo &TRI,
|
|
|
|
const TargetRegisterClass &RC)> Ftor) {
|
|
|
|
return new RAGreedy(Ftor);
|
|
|
|
}
|
|
|
|
|
|
|
|
RAGreedy::RAGreedy(RegClassFilterFunc F):
|
|
|
|
MachineFunctionPass(ID),
|
|
|
|
RegAllocBase(F) {
|
2010-12-08 11:26:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.setPreservesCFG();
|
2013-06-18 03:00:36 +08:00
|
|
|
AU.addRequired<MachineBlockFrequencyInfo>();
|
|
|
|
AU.addPreserved<MachineBlockFrequencyInfo>();
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
|
|
|
AU.addPreserved<AAResultsWrapperPass>();
|
2010-12-08 11:26:16 +08:00
|
|
|
AU.addRequired<LiveIntervals>();
|
2012-06-09 07:44:45 +08:00
|
|
|
AU.addPreserved<LiveIntervals>();
|
2011-01-19 05:13:27 +08:00
|
|
|
AU.addRequired<SlotIndexes>();
|
2010-12-08 11:26:16 +08:00
|
|
|
AU.addPreserved<SlotIndexes>();
|
2011-04-06 05:40:37 +08:00
|
|
|
AU.addRequired<LiveDebugVariables>();
|
|
|
|
AU.addPreserved<LiveDebugVariables>();
|
2010-12-08 11:26:16 +08:00
|
|
|
AU.addRequired<LiveStacks>();
|
|
|
|
AU.addPreserved<LiveStacks>();
|
2010-12-18 07:16:32 +08:00
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
|
AU.addPreserved<MachineDominatorTree>();
|
2010-12-08 11:26:16 +08:00
|
|
|
AU.addRequired<MachineLoopInfo>();
|
|
|
|
AU.addPreserved<MachineLoopInfo>();
|
|
|
|
AU.addRequired<VirtRegMap>();
|
|
|
|
AU.addPreserved<VirtRegMap>();
|
2012-06-21 06:52:26 +08:00
|
|
|
AU.addRequired<LiveRegMatrix>();
|
|
|
|
AU.addPreserved<LiveRegMatrix>();
|
2011-01-19 05:13:27 +08:00
|
|
|
AU.addRequired<EdgeBundles>();
|
|
|
|
AU.addRequired<SpillPlacement>();
|
2017-01-26 07:20:33 +08:00
|
|
|
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
|
2021-12-14 14:49:57 +08:00
|
|
|
AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
|
2010-12-08 11:26:16 +08:00
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
2011-03-09 08:57:29 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// LiveRangeEdit delegate methods
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-11-03 12:07:58 +08:00
|
|
|
bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
|
2017-09-15 15:47:38 +08:00
|
|
|
LiveInterval &LI = LIS->getInterval(VirtReg);
|
2012-06-21 06:52:26 +08:00
|
|
|
if (VRM->hasPhys(VirtReg)) {
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
Matrix->unassign(LI);
|
|
|
|
aboutToRemoveInterval(LI);
|
2011-03-13 09:23:11 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// Unassigned virtreg is probably in the priority queue.
|
|
|
|
// RegAllocBase will erase it after dequeueing.
|
2017-09-15 15:47:38 +08:00
|
|
|
// Nonetheless, clear the live-range so that the debug
|
|
|
|
// dump will show the right state for that VirtReg.
|
|
|
|
LI.clear();
|
2011-03-13 09:23:11 +08:00
|
|
|
return false;
|
|
|
|
}
|
2011-03-09 08:57:29 +08:00
|
|
|
|
2020-11-03 12:07:58 +08:00
|
|
|
void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
|
2012-06-21 06:52:26 +08:00
|
|
|
if (!VRM->hasPhys(VirtReg))
|
2011-03-17 06:56:16 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// Register is assigned, put it back on the queue for reassignment.
|
|
|
|
LiveInterval &LI = LIS->getInterval(VirtReg);
|
2012-06-21 06:52:26 +08:00
|
|
|
Matrix->unassign(LI);
|
RegAlloc: Allow targets to split register allocation
AMDGPU normally spills SGPRs to VGPRs. Previously, since all register
classes are handled at the same time, this was problematic. We don't
know ahead of time how many registers will be needed to be reserved to
handle the spilling. If no VGPRs were left for spilling, we would have
to try to spill to memory. If the spilled SGPRs were required for exec
mask manipulation, it is highly problematic because the lanes active
at the point of spill are not necessarily the same as at the restore
point.
Avoid this problem by fully allocating SGPRs in a separate regalloc
run from VGPRs. This way we know the exact number of VGPRs needed, and
can reserve them for a second run. This fixes the most serious
issues, but it is still possible using inline asm to make all VGPRs
unavailable. Start erroring in the case where we ever would require
memory for an SGPR spill.
This is implemented by giving each regalloc pass a callback which
reports if a register class should be handled or not. A few passes
need some small changes to deal with leftover virtual registers.
In the AMDGPU implementation, a new pass is introduced to take the
place of PrologEpilogInserter for SGPR spills emitted during the first
run.
One disadvantage of this is currently StackSlotColoring is no longer
used for SGPR spills. It would need to be run again, which will
require more work.
Error if the standard -regalloc option is used. Introduce new separate
-sgpr-regalloc and -vgpr-regalloc flags, so the two runs can be
controlled individually. PBQB is not currently supported, so this also
prevents using the unhandled allocator.
2018-09-27 07:36:28 +08:00
|
|
|
RegAllocBase::enqueue(&LI);
|
2011-03-17 06:56:16 +08:00
|
|
|
}
|
|
|
|
|
2020-11-03 12:07:58 +08:00
|
|
|
void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->LRE_DidCloneVirtReg(New, Old);
|
|
|
|
}
|
|
|
|
|
2022-01-06 01:22:02 +08:00
|
|
|
void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
|
2011-09-15 01:34:37 +08:00
|
|
|
// Cloning a register we haven't even heard about yet? Just ignore it.
|
2021-12-01 08:30:16 +08:00
|
|
|
if (!Info.inBounds(Old))
|
2011-09-15 01:34:37 +08:00
|
|
|
return;
|
|
|
|
|
2011-03-30 10:52:39 +08:00
|
|
|
// LRE may clone a virtual register because dead code elimination causes it to
|
2011-07-26 08:54:56 +08:00
|
|
|
// be split into connected components. The new components are much smaller
|
|
|
|
// than the original, so they should get a new chance at being assigned.
|
2011-03-30 10:52:39 +08:00
|
|
|
// same stage as the parent.
|
2021-12-01 08:30:16 +08:00
|
|
|
Info[Old].Stage = RS_Assign;
|
|
|
|
Info.grow(New.id());
|
|
|
|
Info[New] = Info[Old];
|
2011-03-30 10:52:39 +08:00
|
|
|
}
|
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
void RAGreedy::releaseMemory() {
|
2014-07-19 09:05:11 +08:00
|
|
|
SpillerInstance.reset();
|
2011-04-13 05:30:53 +08:00
|
|
|
GlobalCand.clear();
|
2010-12-08 11:26:16 +08:00
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
|
2014-02-06 06:13:59 +08:00
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
|
2011-02-23 07:01:52 +08:00
|
|
|
// Prioritize live ranges by size, assigning larger ranges first.
|
|
|
|
// The queue holds (size, reg) pairs.
|
2011-02-25 07:21:36 +08:00
|
|
|
const unsigned Size = LI->getSize();
|
2020-11-03 12:07:58 +08:00
|
|
|
const Register Reg = LI->reg();
|
|
|
|
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
|
2011-02-25 07:21:36 +08:00
|
|
|
unsigned Prio;
|
2010-12-09 06:57:16 +08:00
|
|
|
|
2021-12-01 08:30:16 +08:00
|
|
|
auto Stage = ExtraInfo->getOrInitStage(Reg);
|
2021-11-30 09:18:29 +08:00
|
|
|
if (Stage == RS_New) {
|
|
|
|
Stage = RS_Assign;
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(Reg, Stage);
|
2021-11-30 09:18:29 +08:00
|
|
|
}
|
|
|
|
if (Stage == RS_Split) {
|
2011-03-28 06:49:21 +08:00
|
|
|
// Unsplit ranges that couldn't be allocated immediately are deferred until
|
2011-09-13 00:54:42 +08:00
|
|
|
// everything else has been allocated.
|
|
|
|
Prio = Size;
|
2021-11-30 09:18:29 +08:00
|
|
|
} else if (Stage == RS_Memory) {
|
2015-07-18 07:04:06 +08:00
|
|
|
// Memory operand should be considered last.
|
|
|
|
// Change the priority such that Memory operand are assigned in
|
|
|
|
// the reverse order that they came in.
|
|
|
|
// TODO: Make this a member variable and probably do something about hints.
|
|
|
|
static unsigned MemOp = 0;
|
|
|
|
Prio = MemOp++;
|
Reverse order of RS_Split live ranges under -compact-regions.
There are two conflicting strategies in play:
- Under high register pressure, we want to assign large live ranges
first. Smaller live ranges are easier to place afterwards.
- Live range splitting is guided by interference, so splitting should be
deferred until interference is as realistic as possible.
With the recent changes to the live range stages, and with compact
regions enabled, it is less traumatic to split a live range too early.
If some of the split products were too big, they can often be split
again.
By reversing the RS_Split order, we get this queue order:
1. Normal live ranges, large to small.
2. RS_Split live ranges, large to small.
The large-to-small order improves RAGreedy's puzzle solving skills under
high register pressure. It may cause a bit more iterated splitting, but
we handle that better now.
With this change, -compact-regions is mostly an improvement on SPEC.
llvm-svn: 136388
2011-07-29 04:48:23 +08:00
|
|
|
} else {
|
2014-02-27 06:07:26 +08:00
|
|
|
// Giant live ranges fall back to the global assignment heuristic, which
|
|
|
|
// prevents excessive spilling in pathological cases.
|
|
|
|
bool ReverseLocal = TRI->reverseLocalAssignment();
|
2015-04-01 03:57:53 +08:00
|
|
|
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
|
2014-10-03 20:20:53 +08:00
|
|
|
bool ForceGlobal = !ReverseLocal &&
|
2021-08-22 02:54:51 +08:00
|
|
|
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
|
2014-02-27 06:07:26 +08:00
|
|
|
|
2021-11-30 09:18:29 +08:00
|
|
|
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
|
2013-07-26 02:35:14 +08:00
|
|
|
LIS->intervalIsInOneMBB(*LI)) {
|
|
|
|
// Allocate original local ranges in linear instruction order. Since they
|
|
|
|
// are singly defined, this produces optimal coloring in the absence of
|
|
|
|
// global interference and other constraints.
|
2014-02-27 06:07:26 +08:00
|
|
|
if (!ReverseLocal)
|
2013-12-11 11:40:15 +08:00
|
|
|
Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
|
|
|
|
else {
|
|
|
|
// Allocating bottom up may allow many short LRGs to be assigned first
|
|
|
|
// to one of the cheap registers. This could be much faster for very
|
|
|
|
// large blocks on targets with many physical registers.
|
2015-04-01 03:57:49 +08:00
|
|
|
Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
|
2013-12-11 11:40:15 +08:00
|
|
|
}
|
2015-04-01 03:57:53 +08:00
|
|
|
Prio |= RC.AllocationPriority << 24;
|
|
|
|
} else {
|
2013-07-26 02:35:14 +08:00
|
|
|
// Allocate global and split ranges in long->short order. Long ranges that
|
|
|
|
// don't fit should be spilled (or split) ASAP so they don't create
|
|
|
|
// interference. Mark a bit to prioritize global above local ranges.
|
|
|
|
Prio = (1u << 29) + Size;
|
2021-07-13 01:47:44 +08:00
|
|
|
|
2021-08-18 10:12:19 +08:00
|
|
|
Prio |= RC.AllocationPriority << 24;
|
2013-07-26 02:35:14 +08:00
|
|
|
}
|
|
|
|
// Mark a higher bit to prioritize global and local above RS_Split.
|
|
|
|
Prio |= (1u << 31);
|
2011-02-23 08:56:56 +08:00
|
|
|
|
2011-03-28 06:49:21 +08:00
|
|
|
// Boost ranges that have a physical register hint.
|
2012-12-04 07:23:50 +08:00
|
|
|
if (VRM->hasKnownPreference(Reg))
|
2011-03-28 06:49:21 +08:00
|
|
|
Prio |= (1u << 30);
|
|
|
|
}
|
2013-07-26 02:35:22 +08:00
|
|
|
// The virtual register number is a tie breaker for same-sized ranges.
|
|
|
|
// Give lower vreg numbers higher priority to assign them first.
|
2014-02-06 06:13:59 +08:00
|
|
|
CurQueue.push(std::make_pair(Prio, ~Reg));
|
2010-12-09 06:57:16 +08:00
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
|
2014-02-06 06:13:59 +08:00
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
|
2014-02-06 06:13:59 +08:00
|
|
|
if (CurQueue.empty())
|
2014-04-14 08:51:57 +08:00
|
|
|
return nullptr;
|
2014-02-06 06:13:59 +08:00
|
|
|
LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
|
|
|
|
CurQueue.pop();
|
2011-02-23 07:01:52 +08:00
|
|
|
return LI;
|
|
|
|
}
|
2010-12-23 06:01:30 +08:00
|
|
|
|
2011-04-21 02:19:48 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Direct Assignment
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// tryAssign - Try to assign VirtReg to an available register.
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
|
|
|
|
AllocationOrder &Order,
|
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
|
|
|
const SmallVirtRegSet &FixedRegisters) {
|
2021-02-27 01:54:20 +08:00
|
|
|
MCRegister PhysReg;
|
2020-09-24 12:58:45 +08:00
|
|
|
for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
|
|
|
|
assert(*I);
|
|
|
|
if (!Matrix->checkInterference(VirtReg, *I)) {
|
|
|
|
if (I.isHint())
|
|
|
|
return *I;
|
|
|
|
else
|
|
|
|
PhysReg = *I;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!PhysReg.isValid())
|
2011-04-21 02:19:48 +08:00
|
|
|
return PhysReg;
|
|
|
|
|
2011-07-09 04:46:18 +08:00
|
|
|
// PhysReg is available, but there may be a better choice.
|
|
|
|
|
|
|
|
// If we missed a simple hint, try to cheaply evict interference from the
|
|
|
|
// preferred register.
|
2020-09-16 05:54:38 +08:00
|
|
|
if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
|
2012-06-21 06:52:26 +08:00
|
|
|
if (Order.isHint(Hint)) {
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister PhysHint = Hint.asMCReg();
|
|
|
|
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
|
2021-11-17 03:26:37 +08:00
|
|
|
|
2021-12-14 14:49:57 +08:00
|
|
|
if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
|
|
|
|
FixedRegisters)) {
|
2020-10-23 01:30:30 +08:00
|
|
|
evictInterference(VirtReg, PhysHint, NewVRegs);
|
|
|
|
return PhysHint;
|
2011-07-09 04:46:18 +08:00
|
|
|
}
|
2016-11-16 09:07:12 +08:00
|
|
|
// Record the missed hint, we may be able to recover
|
|
|
|
// at the end if the surrounding allocation changed.
|
|
|
|
SetOfBrokenHints.insert(&VirtReg);
|
2011-07-09 04:46:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Try to evict interference from a cheaper alternative.
|
2020-12-23 13:21:13 +08:00
|
|
|
uint8_t Cost = RegCosts[PhysReg];
|
2011-04-21 02:19:48 +08:00
|
|
|
|
|
|
|
// Most registers have 0 additional cost.
|
|
|
|
if (!Cost)
|
|
|
|
return PhysReg;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
|
2021-09-23 23:48:23 +08:00
|
|
|
<< (unsigned)Cost << '\n');
|
2021-02-27 01:54:20 +08:00
|
|
|
MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
|
2011-04-21 02:19:48 +08:00
|
|
|
return CheapReg ? CheapReg : PhysReg;
|
|
|
|
}
|
|
|
|
|
2011-02-23 08:29:52 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Interference eviction
|
|
|
|
//===----------------------------------------------------------------------===//
|
2011-02-09 09:14:03 +08:00
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
|
2021-12-14 14:49:57 +08:00
|
|
|
Register PrevReg) const {
|
2020-09-29 07:41:28 +08:00
|
|
|
auto Order =
|
|
|
|
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
|
2020-09-24 12:58:45 +08:00
|
|
|
MCRegister PhysReg;
|
|
|
|
for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
|
|
|
|
if ((*I).id() == PrevReg.id())
|
2013-07-26 02:35:19 +08:00
|
|
|
continue;
|
|
|
|
|
2020-09-24 12:58:45 +08:00
|
|
|
MCRegUnitIterator Units(*I, TRI);
|
2013-07-26 02:35:19 +08:00
|
|
|
for (; Units.isValid(); ++Units) {
|
|
|
|
// Instantiate a "subquery", not to be confused with the Queries array.
|
2017-03-02 05:48:12 +08:00
|
|
|
LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
|
2013-07-26 02:35:19 +08:00
|
|
|
if (subQ.checkInterference())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// If no units have interference, break out with the current PhysReg.
|
|
|
|
if (!Units.isValid())
|
2020-09-24 12:58:45 +08:00
|
|
|
PhysReg = *I;
|
2013-07-26 02:35:19 +08:00
|
|
|
}
|
|
|
|
if (PhysReg)
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
|
|
|
|
<< printReg(PrevReg, TRI) << " to "
|
|
|
|
<< printReg(PhysReg, TRI) << '\n');
|
2013-07-26 02:35:19 +08:00
|
|
|
return PhysReg;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Return true if all interferences between VirtReg and PhysReg between
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// Start and End can be evicted.
|
|
|
|
///
|
|
|
|
/// \param VirtReg Live range that is about to be assigned.
|
|
|
|
/// \param PhysReg Desired register for assignment.
|
|
|
|
/// \param Start Start of range to look for interferences.
|
|
|
|
/// \param End End of range to look for interferences.
|
|
|
|
/// \param MaxCost Only look for cheaper candidates and update with new cost
|
|
|
|
/// when returning true.
|
|
|
|
/// \return True when interference can be evicted cheaper than MaxCost.
|
2021-03-09 12:55:53 +08:00
|
|
|
bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister PhysReg, SlotIndex Start,
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
SlotIndex End,
|
2021-02-27 01:54:20 +08:00
|
|
|
EvictionCost &MaxCost) const {
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
EvictionCost Cost;
|
|
|
|
|
|
|
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
|
|
|
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
|
|
|
|
|
|
|
|
// Check if any interfering live range is heavier than MaxWeight.
|
2020-09-22 05:27:23 +08:00
|
|
|
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
// Check if interference overlast the segment in interest.
|
|
|
|
if (!Intf->overlaps(Start, End))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Never evict spill products. They cannot split or spill.
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getStage(*Intf) == RS_Done)
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Would this break a satisfied hint?
|
2020-09-16 05:54:38 +08:00
|
|
|
bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
// Update eviction cost.
|
|
|
|
Cost.BrokenHints += BreaksHint;
|
2020-09-16 05:54:38 +08:00
|
|
|
Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
// Abort if this would be too expensive.
|
|
|
|
if (!(Cost < MaxCost))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Cost.MaxWeight == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MaxCost = Cost;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-06-20 13:29:26 +08:00
|
|
|
/// Return the physical register that will be best
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// candidate for eviction by a local split interval that will be created
|
|
|
|
/// between Start and End.
|
|
|
|
///
|
|
|
|
/// \param Order The allocation order
|
|
|
|
/// \param VirtReg Live range that is about to be assigned.
|
|
|
|
/// \param Start Start of range to look for interferences
|
|
|
|
/// \param End End of range to look for interferences
|
|
|
|
/// \param BestEvictweight The eviction cost of that eviction
|
|
|
|
/// \return The PhysReg which is the best candidate for eviction and the
|
|
|
|
/// eviction cost in BestEvictweight
|
2020-11-03 12:07:58 +08:00
|
|
|
MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
|
2021-03-09 12:55:53 +08:00
|
|
|
const LiveInterval &VirtReg,
|
2020-11-03 12:07:58 +08:00
|
|
|
SlotIndex Start, SlotIndex End,
|
2021-03-09 12:55:53 +08:00
|
|
|
float *BestEvictweight) const {
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
EvictionCost BestEvictCost;
|
|
|
|
BestEvictCost.setMax();
|
2020-09-16 05:54:38 +08:00
|
|
|
BestEvictCost.MaxWeight = VirtReg.weight();
|
2020-11-03 12:07:58 +08:00
|
|
|
MCRegister BestEvicteePhys;
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
|
|
|
// Go over all physical registers and find the best candidate for eviction
|
2020-11-03 12:07:58 +08:00
|
|
|
for (MCRegister PhysReg : Order.getOrder()) {
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
|
|
|
if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
|
|
|
|
BestEvictCost))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Best so far.
|
|
|
|
BestEvicteePhys = PhysReg;
|
|
|
|
}
|
|
|
|
*BestEvictweight = BestEvictCost.MaxWeight;
|
|
|
|
return BestEvicteePhys;
|
|
|
|
}
|
|
|
|
|
2011-07-09 04:46:18 +08:00
|
|
|
/// evictInterference - Evict any interferring registers that prevent VirtReg
|
|
|
|
/// from being assigned to Physreg. This assumes that canEvictInterference
|
|
|
|
/// returned true.
|
2022-02-04 01:07:42 +08:00
|
|
|
void RAGreedy::evictInterference(const LiveInterval &VirtReg,
|
|
|
|
MCRegister PhysReg,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2021-03-17 03:41:26 +08:00
|
|
|
// Make sure that VirtReg has a cascade number, and assign that cascade
|
2011-07-09 04:46:18 +08:00
|
|
|
// number to every evicted register. These live ranges than then only be
|
|
|
|
// evicted by a newer cascade, preventing infinite loops.
|
2021-12-01 08:30:16 +08:00
|
|
|
unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
|
2011-07-09 04:46:18 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
|
|
|
|
<< " interference: Cascade " << Cascade << '\n');
|
2012-06-21 06:52:26 +08:00
|
|
|
|
|
|
|
// Collect all interfering virtregs first.
|
2022-02-04 01:07:42 +08:00
|
|
|
SmallVector<const LiveInterval *, 8> Intfs;
|
2012-06-21 06:52:26 +08:00
|
|
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
|
|
|
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
|
2017-03-04 07:27:20 +08:00
|
|
|
// We usually have the interfering VRegs cached so collectInterferingVRegs()
|
|
|
|
// should be fast, we may need to recalculate if when different physregs
|
|
|
|
// overlap the same register unit so we had different SubRanges queried
|
|
|
|
// against it.
|
2022-02-04 01:07:42 +08:00
|
|
|
ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
|
2012-06-21 06:52:26 +08:00
|
|
|
Intfs.append(IVR.begin(), IVR.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Evict them second. This will invalidate the queries.
|
2022-02-04 01:07:42 +08:00
|
|
|
for (const LiveInterval *Intf : Intfs) {
|
2012-06-21 06:52:26 +08:00
|
|
|
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
|
2020-09-16 05:54:38 +08:00
|
|
|
if (!VRM->hasPhys(Intf->reg()))
|
2012-06-21 06:52:26 +08:00
|
|
|
continue;
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
2020-09-16 05:54:38 +08:00
|
|
|
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
2012-06-21 06:52:26 +08:00
|
|
|
Matrix->unassign(*Intf);
|
2021-12-01 08:30:16 +08:00
|
|
|
assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
|
2012-06-21 06:52:26 +08:00
|
|
|
VirtReg.isSpillable() < Intf->isSpillable()) &&
|
|
|
|
"Cannot decrease cascade number, illegal eviction");
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setCascade(Intf->reg(), Cascade);
|
2012-06-21 06:52:26 +08:00
|
|
|
++NumEvicted;
|
2020-09-16 05:54:38 +08:00
|
|
|
NewVRegs.push_back(Intf->reg());
|
2011-07-09 04:46:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-15 01:38:17 +08:00
|
|
|
/// Returns true if the given \p PhysReg is a callee saved register and has not
|
|
|
|
/// been used for allocation yet.
|
2021-12-14 14:49:57 +08:00
|
|
|
bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
|
2020-06-30 23:57:24 +08:00
|
|
|
MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
|
|
|
|
if (!CSR)
|
2015-07-15 01:38:17 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return !Matrix->isPhysRegUsed(PhysReg);
|
|
|
|
}
|
|
|
|
|
2021-12-21 12:03:40 +08:00
|
|
|
Optional<unsigned>
|
|
|
|
RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
|
|
|
|
const AllocationOrder &Order,
|
|
|
|
unsigned CostPerUseLimit) const {
|
2013-01-12 08:57:44 +08:00
|
|
|
unsigned OrderLimit = Order.getOrder().size();
|
2011-02-23 08:29:52 +08:00
|
|
|
|
2020-12-23 13:21:13 +08:00
|
|
|
if (CostPerUseLimit < uint8_t(~0u)) {
|
2013-01-12 08:57:44 +08:00
|
|
|
// Check of any registers in RC are below CostPerUseLimit.
|
2020-09-16 05:54:38 +08:00
|
|
|
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
|
2020-12-23 13:21:13 +08:00
|
|
|
uint8_t MinCost = RegClassInfo.getMinCost(RC);
|
2013-01-12 08:57:44 +08:00
|
|
|
if (MinCost >= CostPerUseLimit) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
|
|
|
|
<< MinCost << ", no cheaper registers to be found.\n");
|
2021-12-21 12:03:40 +08:00
|
|
|
return None;
|
2013-01-12 08:57:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// It is normal for register classes to have a long tail of registers with
|
|
|
|
// the same cost. We don't need to look at them if they're too expensive.
|
2020-12-23 13:21:13 +08:00
|
|
|
if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
|
2013-01-12 08:57:44 +08:00
|
|
|
OrderLimit = RegClassInfo.getLastCostChange(RC);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
|
|
|
|
<< " regs.\n");
|
2013-01-12 08:57:44 +08:00
|
|
|
}
|
2011-07-09 04:46:18 +08:00
|
|
|
}
|
2021-12-21 12:03:40 +08:00
|
|
|
return OrderLimit;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
|
|
|
|
MCRegister PhysReg) const {
|
|
|
|
if (RegCosts[PhysReg] >= CostPerUseLimit)
|
|
|
|
return false;
|
|
|
|
// The first use of a callee-saved register in a function has cost 1.
|
|
|
|
// Don't start using a CSR when the CostPerUseLimit is low.
|
|
|
|
if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
|
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
|
|
|
|
<< printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
|
|
|
|
<< '\n');
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-11-16 05:27:00 +08:00
|
|
|
/// tryEvict - Try to evict all interferences for a physreg.
|
|
|
|
/// @param VirtReg Currently unassigned virtual register.
|
|
|
|
/// @param Order Physregs to try.
|
|
|
|
/// @return Physreg to assign VirtReg, or 0.
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
|
|
|
|
AllocationOrder &Order,
|
2021-11-16 05:27:00 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
|
|
|
uint8_t CostPerUseLimit,
|
|
|
|
const SmallVirtRegSet &FixedRegisters) {
|
|
|
|
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
|
|
|
|
TimePassesIsEnabled);
|
2011-02-09 09:14:03 +08:00
|
|
|
|
2021-12-14 14:49:57 +08:00
|
|
|
MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
|
|
|
|
VirtReg, Order, CostPerUseLimit, FixedRegisters);
|
2021-02-27 01:54:20 +08:00
|
|
|
if (BestPhys.isValid())
|
|
|
|
evictInterference(VirtReg, BestPhys, NewVRegs);
|
2011-02-23 08:29:52 +08:00
|
|
|
return BestPhys;
|
2010-12-10 02:15:21 +08:00
|
|
|
}
|
|
|
|
|
2011-01-19 05:13:27 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Region Splitting
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2011-04-07 05:32:38 +08:00
|
|
|
/// addSplitConstraints - Fill out the SplitConstraints vector based on the
|
|
|
|
/// interference pattern in Physreg and its aliases. Add the constraints to
|
|
|
|
/// SpillPlacement and return the static cost of this split in Cost, assuming
|
|
|
|
/// that all preferences in SplitConstraints are met.
|
2011-04-09 10:59:09 +08:00
|
|
|
/// Return false if there are no bundles with positive bias.
|
|
|
|
bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency &Cost) {
|
2011-04-06 11:57:00 +08:00
|
|
|
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
|
2011-04-02 14:03:38 +08:00
|
|
|
|
2011-01-19 05:13:27 +08:00
|
|
|
// Reset interference dependent info.
|
2011-04-06 11:57:00 +08:00
|
|
|
SplitConstraints.resize(UseBlocks.size());
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency StaticCost = 0;
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
|
|
|
|
const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
|
|
|
|
SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
|
2011-03-05 09:10:31 +08:00
|
|
|
|
2011-02-10 06:50:26 +08:00
|
|
|
BC.Number = BI.MBB->getNumber();
|
2011-04-02 14:03:38 +08:00
|
|
|
Intf.moveToBlock(BC.Number);
|
2011-04-06 11:57:00 +08:00
|
|
|
BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
|
2018-12-14 22:07:57 +08:00
|
|
|
BC.Exit = (BI.LiveOut &&
|
|
|
|
!LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
|
|
|
|
? SpillPlacement::PrefReg
|
|
|
|
: SpillPlacement::DontCare;
|
2013-05-15 15:36:59 +08:00
|
|
|
BC.ChangesValue = BI.FirstDef.isValid();
|
2011-01-19 05:13:27 +08:00
|
|
|
|
2011-04-02 14:03:38 +08:00
|
|
|
if (!Intf.hasInterference())
|
|
|
|
continue;
|
|
|
|
|
2011-03-05 09:10:31 +08:00
|
|
|
// Number of spill code instructions to insert.
|
|
|
|
unsigned Ins = 0;
|
|
|
|
|
|
|
|
// Interference for the live-in value.
|
2011-04-02 14:03:38 +08:00
|
|
|
if (BI.LiveIn) {
|
2016-02-19 06:09:30 +08:00
|
|
|
if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
|
|
|
|
BC.Entry = SpillPlacement::MustSpill;
|
2011-03-05 09:10:31 +08:00
|
|
|
++Ins;
|
2016-02-19 06:09:30 +08:00
|
|
|
} else if (Intf.first() < BI.FirstInstr) {
|
|
|
|
BC.Entry = SpillPlacement::PrefSpill;
|
|
|
|
++Ins;
|
|
|
|
} else if (Intf.first() < BI.LastInstr) {
|
|
|
|
++Ins;
|
|
|
|
}
|
2018-09-26 02:37:38 +08:00
|
|
|
|
|
|
|
// Abort if the spill cannot be inserted at the MBB' start
|
|
|
|
if (((BC.Entry == SpillPlacement::MustSpill) ||
|
|
|
|
(BC.Entry == SpillPlacement::PrefSpill)) &&
|
|
|
|
SlotIndex::isEarlierInstr(BI.FirstInstr,
|
|
|
|
SA->getFirstSplitPoint(BC.Number)))
|
|
|
|
return false;
|
2011-02-09 07:02:58 +08:00
|
|
|
}
|
2011-01-19 05:13:27 +08:00
|
|
|
|
2011-03-05 09:10:31 +08:00
|
|
|
// Interference for the live-out value.
|
2011-04-02 14:03:38 +08:00
|
|
|
if (BI.LiveOut) {
|
2016-02-19 06:09:30 +08:00
|
|
|
if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
|
|
|
|
BC.Exit = SpillPlacement::MustSpill;
|
|
|
|
++Ins;
|
|
|
|
} else if (Intf.last() > BI.LastInstr) {
|
|
|
|
BC.Exit = SpillPlacement::PrefSpill;
|
2011-03-05 09:10:31 +08:00
|
|
|
++Ins;
|
2016-02-19 06:09:30 +08:00
|
|
|
} else if (Intf.last() > BI.FirstInstr) {
|
|
|
|
++Ins;
|
|
|
|
}
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
|
|
|
|
2011-03-05 09:10:31 +08:00
|
|
|
// Accumulate the total frequency of inserted spill code.
|
2013-07-17 02:26:18 +08:00
|
|
|
while (Ins--)
|
|
|
|
StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
2011-04-09 10:59:09 +08:00
|
|
|
Cost = StaticCost;
|
2011-04-06 11:57:00 +08:00
|
|
|
|
2011-04-07 05:32:38 +08:00
|
|
|
// Add constraints for use-blocks. Note that these are the only constraints
|
|
|
|
// that may add a positive bias, it is downhill from here.
|
|
|
|
SpillPlacer->addConstraints(SplitConstraints);
|
2011-04-09 10:59:09 +08:00
|
|
|
return SpillPlacer->scanActiveBundles();
|
|
|
|
}
|
2011-04-06 11:57:00 +08:00
|
|
|
|
2011-04-09 10:59:09 +08:00
|
|
|
/// addThroughConstraints - Add constraints and links to SpillPlacer from the
|
|
|
|
/// live-through blocks in Blocks.
|
2018-09-26 02:37:38 +08:00
|
|
|
bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
|
2011-04-09 10:59:09 +08:00
|
|
|
ArrayRef<unsigned> Blocks) {
|
2011-04-07 05:32:38 +08:00
|
|
|
const unsigned GroupSize = 8;
|
|
|
|
SpillPlacement::BlockConstraint BCS[GroupSize];
|
2011-04-09 10:59:09 +08:00
|
|
|
unsigned TBS[GroupSize];
|
|
|
|
unsigned B = 0, T = 0;
|
2011-04-06 11:57:00 +08:00
|
|
|
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned Number : Blocks) {
|
2011-04-07 05:32:38 +08:00
|
|
|
Intf.moveToBlock(Number);
|
|
|
|
|
2011-04-08 01:27:46 +08:00
|
|
|
if (!Intf.hasInterference()) {
|
2011-04-09 10:59:09 +08:00
|
|
|
assert(T < GroupSize && "Array overflow");
|
|
|
|
TBS[T] = Number;
|
|
|
|
if (++T == GroupSize) {
|
2011-07-18 20:00:32 +08:00
|
|
|
SpillPlacer->addLinks(makeArrayRef(TBS, T));
|
2011-04-09 10:59:09 +08:00
|
|
|
T = 0;
|
|
|
|
}
|
2011-04-08 01:27:46 +08:00
|
|
|
continue;
|
2011-04-07 05:32:38 +08:00
|
|
|
}
|
|
|
|
|
2011-04-09 10:59:09 +08:00
|
|
|
assert(B < GroupSize && "Array overflow");
|
|
|
|
BCS[B].Number = Number;
|
|
|
|
|
2018-09-26 02:37:38 +08:00
|
|
|
// Abort if the spill cannot be inserted at the MBB' start
|
|
|
|
MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
|
2021-04-28 08:16:29 +08:00
|
|
|
auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
|
|
|
|
if (FirstNonDebugInstr != MBB->end() &&
|
|
|
|
SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
|
|
|
|
SA->getFirstSplitPoint(Number)))
|
2018-09-26 02:37:38 +08:00
|
|
|
return false;
|
2011-04-08 01:27:46 +08:00
|
|
|
// Interference for the live-in value.
|
|
|
|
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
|
|
|
|
BCS[B].Entry = SpillPlacement::MustSpill;
|
|
|
|
else
|
|
|
|
BCS[B].Entry = SpillPlacement::PrefSpill;
|
|
|
|
|
|
|
|
// Interference for the live-out value.
|
|
|
|
if (Intf.last() >= SA->getLastSplitPoint(Number))
|
|
|
|
BCS[B].Exit = SpillPlacement::MustSpill;
|
|
|
|
else
|
|
|
|
BCS[B].Exit = SpillPlacement::PrefSpill;
|
|
|
|
|
2011-04-07 05:32:38 +08:00
|
|
|
if (++B == GroupSize) {
|
2014-08-27 13:25:25 +08:00
|
|
|
SpillPlacer->addConstraints(makeArrayRef(BCS, B));
|
2011-04-07 05:32:38 +08:00
|
|
|
B = 0;
|
|
|
|
}
|
2011-04-06 11:57:00 +08:00
|
|
|
}
|
|
|
|
|
2014-08-27 13:25:25 +08:00
|
|
|
SpillPlacer->addConstraints(makeArrayRef(BCS, B));
|
2011-07-18 20:00:32 +08:00
|
|
|
SpillPlacer->addLinks(makeArrayRef(TBS, T));
|
2018-09-26 02:37:38 +08:00
|
|
|
return true;
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
|
|
|
|
2018-09-26 02:37:38 +08:00
|
|
|
bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
|
2011-04-13 05:30:53 +08:00
|
|
|
// Keep track of through blocks that have not been added to SpillPlacer.
|
|
|
|
BitVector Todo = SA->getThroughBlocks();
|
|
|
|
SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
|
|
|
|
unsigned AddedTo = 0;
|
2011-04-09 10:59:09 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
unsigned Visited = 0;
|
|
|
|
#endif
|
2011-04-13 05:30:53 +08:00
|
|
|
|
2022-04-05 15:10:00 +08:00
|
|
|
unsigned long Budget = GrowRegionComplexityBudget;
|
2017-06-07 06:22:41 +08:00
|
|
|
while (true) {
|
2011-04-09 10:59:09 +08:00
|
|
|
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
|
|
|
|
// Find new through blocks in the periphery of PrefRegBundles.
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned Bundle : NewBundles) {
|
2011-04-09 10:59:09 +08:00
|
|
|
// Look at all blocks connected to Bundle in the full graph.
|
|
|
|
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
|
2022-04-05 15:10:00 +08:00
|
|
|
// Limit compilation time by bailing out after we use all our budget.
|
|
|
|
if (Blocks.size() >= Budget)
|
|
|
|
return false;
|
|
|
|
Budget -= Blocks.size();
|
2021-02-18 15:58:46 +08:00
|
|
|
for (unsigned Block : Blocks) {
|
2011-04-13 05:30:53 +08:00
|
|
|
if (!Todo.test(Block))
|
2011-04-09 10:59:09 +08:00
|
|
|
continue;
|
2011-04-13 05:30:53 +08:00
|
|
|
Todo.reset(Block);
|
2011-04-09 10:59:09 +08:00
|
|
|
// This is a new through block. Add it to SpillPlacer later.
|
2011-04-13 05:30:53 +08:00
|
|
|
ActiveBlocks.push_back(Block);
|
2011-04-09 10:59:09 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
++Visited;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Any new blocks to add?
|
2011-07-06 02:46:42 +08:00
|
|
|
if (ActiveBlocks.size() == AddedTo)
|
|
|
|
break;
|
2011-07-23 11:22:33 +08:00
|
|
|
|
|
|
|
// Compute through constraints from the interference, or assume that all
|
|
|
|
// through blocks prefer spilling when forming compact regions.
|
2014-08-27 13:25:25 +08:00
|
|
|
auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
|
2018-09-26 02:37:38 +08:00
|
|
|
if (Cand.PhysReg) {
|
|
|
|
if (!addThroughConstraints(Cand.Intf, NewBlocks))
|
|
|
|
return false;
|
|
|
|
} else
|
2011-08-04 07:09:38 +08:00
|
|
|
// Provide a strong negative bias on through blocks to prevent unwanted
|
|
|
|
// liveness on loop backedges.
|
|
|
|
SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
|
2011-07-06 02:46:42 +08:00
|
|
|
AddedTo = ActiveBlocks.size();
|
|
|
|
|
2011-04-09 10:59:09 +08:00
|
|
|
// Perhaps iterating can enable more bundles?
|
|
|
|
SpillPlacer->iterate();
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ", v=" << Visited);
|
2018-09-26 02:37:38 +08:00
|
|
|
return true;
|
2011-04-09 10:59:09 +08:00
|
|
|
}
|
2011-03-05 09:10:31 +08:00
|
|
|
|
2011-07-23 11:41:57 +08:00
|
|
|
/// calcCompactRegion - Compute the set of edge bundles that should be live
|
|
|
|
/// when splitting the current live range into compact regions. Compact
|
|
|
|
/// regions can be computed without looking at interference. They are the
|
|
|
|
/// regions formed by removing all the live-through blocks from the live range.
|
|
|
|
///
|
|
|
|
/// Returns false if the current live range is already compact, or if the
|
|
|
|
/// compact regions would form single block regions anyway.
|
|
|
|
bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
|
|
|
|
// Without any through blocks, the live range is already compact.
|
|
|
|
if (!SA->getNumThroughBlocks())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Compact regions don't correspond to any physreg.
|
2020-10-07 05:38:41 +08:00
|
|
|
Cand.reset(IntfCache, MCRegister::NoRegister);
|
2011-07-23 11:41:57 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Compact region bundles");
|
2011-07-23 11:41:57 +08:00
|
|
|
|
|
|
|
// Use the spill placer to determine the live bundles. GrowRegion pretends
|
|
|
|
// that all the through blocks have interference when PhysReg is unset.
|
|
|
|
SpillPlacer->prepare(Cand.LiveBundles);
|
|
|
|
|
|
|
|
// The static split cost will be zero since Cand.Intf reports no interference.
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency Cost;
|
2011-07-23 11:41:57 +08:00
|
|
|
if (!addSplitConstraints(Cand.Intf, Cost)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ", none.\n");
|
2011-07-23 11:41:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-09-26 02:37:38 +08:00
|
|
|
if (!growRegion(Cand)) {
|
|
|
|
LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-07-23 11:41:57 +08:00
|
|
|
SpillPlacer->finish();
|
|
|
|
|
|
|
|
if (!Cand.LiveBundles.any()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ", none.\n");
|
2011-07-23 11:41:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
2020-09-22 07:27:09 +08:00
|
|
|
for (int I : Cand.LiveBundles.set_bits())
|
|
|
|
dbgs() << " EB#" << I;
|
2011-07-23 11:41:57 +08:00
|
|
|
dbgs() << ".\n";
|
|
|
|
});
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-04-23 06:47:40 +08:00
|
|
|
/// calcSpillCost - Compute how expensive it would be to split the live range in
|
|
|
|
/// SA around all use blocks instead of forming bundle regions.
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency RAGreedy::calcSpillCost() {
|
|
|
|
BlockFrequency Cost = 0;
|
2011-04-23 06:47:40 +08:00
|
|
|
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
|
2020-09-22 07:27:09 +08:00
|
|
|
for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
|
2011-04-23 06:47:40 +08:00
|
|
|
unsigned Number = BI.MBB->getNumber();
|
|
|
|
// We normally only need one spill instruction - a load or a store.
|
|
|
|
Cost += SpillPlacer->getBlockFrequency(Number);
|
|
|
|
|
|
|
|
// Unless the value is redefined in the block.
|
2011-08-03 07:04:08 +08:00
|
|
|
if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
|
|
|
|
Cost += SpillPlacer->getBlockFrequency(Number);
|
2011-04-23 06:47:40 +08:00
|
|
|
}
|
|
|
|
return Cost;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if splitting Evictee will create a local split interval in
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// basic block number BBNumber that may cause a bad eviction chain. This is
|
|
|
|
/// intended to prevent bad eviction sequences like:
|
|
|
|
/// movl %ebp, 8(%esp) # 4-byte Spill
|
|
|
|
/// movl %ecx, %ebp
|
|
|
|
/// movl %ebx, %ecx
|
|
|
|
/// movl %edi, %ebx
|
|
|
|
/// movl %edx, %edi
|
|
|
|
/// cltd
|
|
|
|
/// idivl %esi
|
|
|
|
/// movl %edi, %edx
|
|
|
|
/// movl %ebx, %edi
|
|
|
|
/// movl %ecx, %ebx
|
|
|
|
/// movl %ebp, %ecx
|
|
|
|
/// movl 16(%esp), %ebp # 4 - byte Reload
|
|
|
|
///
|
|
|
|
/// Such sequences are created in 2 scenarios:
|
|
|
|
///
|
|
|
|
/// Scenario #1:
|
2017-11-30 20:12:19 +08:00
|
|
|
/// %0 is evicted from physreg0 by %1.
|
|
|
|
/// Evictee %0 is intended for region splitting with split candidate
|
|
|
|
/// physreg0 (the reg %0 was evicted from).
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// Region splitting creates a local interval because of interference with the
|
2018-06-20 13:29:26 +08:00
|
|
|
/// evictor %1 (normally region splitting creates 2 interval, the "by reg"
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// and "by stack" intervals and local interval created when interference
|
|
|
|
/// occurs).
|
2017-11-30 20:12:19 +08:00
|
|
|
/// One of the split intervals ends up evicting %2 from physreg1.
|
|
|
|
/// Evictee %2 is intended for region splitting with split candidate
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// physreg1.
|
2017-11-30 20:12:19 +08:00
|
|
|
/// One of the split intervals ends up evicting %3 from physreg2, etc.
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
///
|
|
|
|
/// Scenario #2
|
2017-11-30 20:12:19 +08:00
|
|
|
/// %0 is evicted from physreg0 by %1.
|
|
|
|
/// %2 is evicted from physreg2 by %3 etc.
|
|
|
|
/// Evictee %0 is intended for region splitting with split candidate
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// physreg1.
|
|
|
|
/// Region splitting creates a local interval because of interference with the
|
2017-11-30 20:12:19 +08:00
|
|
|
/// evictor %1.
|
|
|
|
/// One of the split intervals ends up evicting back original evictor %1
|
|
|
|
/// from physreg0 (the reg %0 was evicted from).
|
|
|
|
/// Another evictee %2 is intended for region splitting with split candidate
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// physreg1.
|
2017-11-30 20:12:19 +08:00
|
|
|
/// One of the split intervals ends up evicting %3 from physreg2, etc.
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
///
|
|
|
|
/// \param Evictee The register considered to be split.
|
|
|
|
/// \param Cand The split candidate that determines the physical register
|
|
|
|
/// we are splitting for and the interferences.
|
|
|
|
/// \param BBNumber The number of a BB for which the region split process will
|
|
|
|
/// create a local split interval.
|
2018-01-31 21:31:08 +08:00
|
|
|
/// \param Order The physical registers that may get evicted by a split
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
/// artifact of Evictee.
|
|
|
|
/// \return True if splitting Evictee may cause a bad eviction chain, false
|
|
|
|
/// otherwise.
|
2020-10-23 01:30:30 +08:00
|
|
|
bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
GlobalSplitCandidate &Cand,
|
|
|
|
unsigned BBNumber,
|
|
|
|
const AllocationOrder &Order) {
|
|
|
|
EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
|
|
|
|
unsigned Evictor = VregEvictorInfo.first;
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister PhysReg = VregEvictorInfo.second;
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
|
|
|
// No actual evictor.
|
|
|
|
if (!Evictor || !PhysReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
float MaxWeight = 0;
|
2020-11-03 12:07:58 +08:00
|
|
|
MCRegister FutureEvictedPhysReg =
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
|
|
|
|
Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
|
|
|
|
|
2018-01-31 21:31:08 +08:00
|
|
|
// The bad eviction chain occurs when either the split candidate is the
|
|
|
|
// evicting reg or one of the split artifact will evict the evicting reg.
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Cand.Intf.moveToBlock(BBNumber);
|
|
|
|
|
|
|
|
// Check to see if the Evictor contains interference (with Evictee) in the
|
|
|
|
// given BB. If so, this interference caused the eviction of Evictee from
|
|
|
|
// PhysReg. This suggest that we will create a local interval during the
|
|
|
|
// region split to avoid this interference This local interval may cause a bad
|
|
|
|
// eviction chain.
|
|
|
|
if (!LIS->hasInterval(Evictor))
|
|
|
|
return false;
|
|
|
|
LiveInterval &EvictorLI = LIS->getInterval(Evictor);
|
|
|
|
if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Now, check to see if the local interval we will create is going to be
|
|
|
|
// expensive enough to evict somebody If so, this may cause a bad eviction
|
|
|
|
// chain.
|
|
|
|
float splitArtifactWeight =
|
2020-09-30 00:09:25 +08:00
|
|
|
VRAI->futureWeight(LIS->getInterval(Evictee),
|
|
|
|
Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-01-19 05:13:27 +08:00
|
|
|
/// calcGlobalSplitCost - Return the global split cost of following the split
|
|
|
|
/// pattern in LiveBundles. This cost should be added to the local cost of the
|
2011-03-05 09:10:31 +08:00
|
|
|
/// interference pattern in SplitConstraints.
|
2011-01-19 05:13:27 +08:00
|
|
|
///
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
|
2022-03-08 00:23:05 +08:00
|
|
|
const AllocationOrder &Order) {
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency GlobalCost = 0;
|
2011-04-13 05:30:53 +08:00
|
|
|
const BitVector &LiveBundles = Cand.LiveBundles;
|
2011-04-06 11:57:00 +08:00
|
|
|
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
|
|
|
|
const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
|
|
|
|
SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
|
2017-06-07 06:22:41 +08:00
|
|
|
bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
|
|
|
|
bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
|
2011-03-05 11:28:51 +08:00
|
|
|
unsigned Ins = 0;
|
|
|
|
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
Cand.Intf.moveToBlock(BC.Number);
|
|
|
|
|
2011-04-06 11:57:00 +08:00
|
|
|
if (BI.LiveIn)
|
|
|
|
Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
|
|
|
|
if (BI.LiveOut)
|
|
|
|
Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
|
2013-07-17 02:26:18 +08:00
|
|
|
while (Ins--)
|
|
|
|
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
2011-04-06 11:57:00 +08:00
|
|
|
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned Number : Cand.ActiveBlocks) {
|
2017-06-07 06:22:41 +08:00
|
|
|
bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
|
|
|
|
bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
|
2011-04-07 05:32:41 +08:00
|
|
|
if (!RegIn && !RegOut)
|
|
|
|
continue;
|
|
|
|
if (RegIn && RegOut) {
|
|
|
|
// We need double spill code if this block has interference.
|
2011-07-14 08:17:10 +08:00
|
|
|
Cand.Intf.moveToBlock(Number);
|
2013-07-17 02:26:18 +08:00
|
|
|
if (Cand.Intf.hasInterference()) {
|
|
|
|
GlobalCost += SpillPlacer->getBlockFrequency(Number);
|
|
|
|
GlobalCost += SpillPlacer->getBlockFrequency(Number);
|
|
|
|
}
|
2011-04-07 05:32:41 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// live-in / stack-out or stack-in live-out.
|
|
|
|
GlobalCost += SpillPlacer->getBlockFrequency(Number);
|
2011-04-06 11:57:00 +08:00
|
|
|
}
|
2011-01-19 05:13:27 +08:00
|
|
|
return GlobalCost;
|
|
|
|
}
|
|
|
|
|
2011-07-27 07:41:46 +08:00
|
|
|
/// splitAroundRegion - Split the current live range around the regions
|
|
|
|
/// determined by BundleCand and GlobalCand.
|
2011-01-20 06:11:48 +08:00
|
|
|
///
|
2011-07-27 07:41:46 +08:00
|
|
|
/// Before calling this function, GlobalCand and BundleCand must be initialized
|
|
|
|
/// so each bundle is assigned to a valid candidate, or NoCand for the
|
|
|
|
/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
|
|
|
|
/// objects must be initialized for the current live range, and intervals
|
|
|
|
/// created for the used candidates.
|
2011-01-20 06:11:48 +08:00
|
|
|
///
|
2011-07-27 07:41:46 +08:00
|
|
|
/// @param LREdit The LiveRangeEdit object handling the current split.
|
|
|
|
/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
|
|
|
|
/// must appear in this list.
|
|
|
|
void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
|
|
|
|
ArrayRef<unsigned> UsedCands) {
|
|
|
|
// These are the intervals created for new global ranges. We may create more
|
|
|
|
// intervals for local ranges.
|
|
|
|
const unsigned NumGlobalIntvs = LREdit.size();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
|
|
|
|
<< " globals.\n");
|
2011-07-27 07:41:46 +08:00
|
|
|
assert(NumGlobalIntvs && "No global intervals configured");
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-08-06 06:20:45 +08:00
|
|
|
// Isolate even single instructions when dealing with a proper sub-class.
|
2011-08-07 02:20:24 +08:00
|
|
|
// That guarantees register class inflation for the stack interval because it
|
2011-08-06 06:20:45 +08:00
|
|
|
// is all copies.
|
2020-11-03 12:07:58 +08:00
|
|
|
Register Reg = SA->getParent().reg();
|
2011-08-06 06:20:45 +08:00
|
|
|
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
|
|
|
|
|
Reapply r134047 now that the world is ready for it.
This patch will sometimes choose live range split points next to
interference instead of always splitting next to a register point. That
means spill code can now appear almost anywhere, and it was necessary
to fix code that didn't expect that.
The difficult places were:
- Between a CALL returning a value on the x87 stack and the
corresponding FpPOP_RETVAL (was FpGET_ST0). Probably also near x87
inline assembly, but that didn't actually show up in testing.
- Between a CALL popping arguments off the stack and the corresponding
ADJCALLSTACKUP.
Both are fixed now. The only place spill code can't appear is after
terminators, see SplitAnalysis::getLastSplitPoint.
Original commit message:
Rewrite RAGreedy::splitAroundRegion, now with cool ASCII art.
This function has to deal with a lot of special cases, and the old
version got it wrong sometimes. In particular, it would sometimes leave
multiple uses in the stack interval in a single block. That causes bad
code with multiple reloads in the same basic block.
The new version handles block entry and exit in a single pass. It first
eliminates all the easy cases, and then goes on to create a local
interval for the blocks with difficult interference. Previously, we
would only create the local interval for completely isolated blocks.
It can happen that the stack interval becomes completely empty because
we could allocate a register in all edge bundles, and the new local
intervals deal with the interference. The empty stack interval is
harmless, but we need to remove a SplitKit assertion that checks for
empty intervals.
llvm-svn: 134125
2011-06-30 09:30:39 +08:00
|
|
|
// First handle all the blocks with uses.
|
2011-04-06 11:57:00 +08:00
|
|
|
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
|
2020-09-22 07:27:09 +08:00
|
|
|
for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
|
2011-07-27 07:41:46 +08:00
|
|
|
unsigned Number = BI.MBB->getNumber();
|
|
|
|
unsigned IntvIn = 0, IntvOut = 0;
|
|
|
|
SlotIndex IntfIn, IntfOut;
|
|
|
|
if (BI.LiveIn) {
|
2017-06-07 06:22:41 +08:00
|
|
|
unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
|
2011-07-27 07:41:46 +08:00
|
|
|
if (CandIn != NoCand) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[CandIn];
|
|
|
|
IntvIn = Cand.IntvIdx;
|
|
|
|
Cand.Intf.moveToBlock(Number);
|
|
|
|
IntfIn = Cand.Intf.first();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (BI.LiveOut) {
|
2017-06-07 06:22:41 +08:00
|
|
|
unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
|
2011-07-27 07:41:46 +08:00
|
|
|
if (CandOut != NoCand) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[CandOut];
|
|
|
|
IntvOut = Cand.IntvIdx;
|
|
|
|
Cand.Intf.moveToBlock(Number);
|
|
|
|
IntfOut = Cand.Intf.last();
|
|
|
|
}
|
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-04-13 03:32:53 +08:00
|
|
|
// Create separate intervals for isolated blocks with multiple uses.
|
2011-07-27 07:41:46 +08:00
|
|
|
if (!IntvIn && !IntvOut) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
|
2011-08-06 06:20:45 +08:00
|
|
|
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
|
Reapply r134047 now that the world is ready for it.
This patch will sometimes choose live range split points next to
interference instead of always splitting next to a register point. That
means spill code can now appear almost anywhere, and it was necessary
to fix code that didn't expect that.
The difficult places were:
- Between a CALL returning a value on the x87 stack and the
corresponding FpPOP_RETVAL (was FpGET_ST0). Probably also near x87
inline assembly, but that didn't actually show up in testing.
- Between a CALL popping arguments off the stack and the corresponding
ADJCALLSTACKUP.
Both are fixed now. The only place spill code can't appear is after
terminators, see SplitAnalysis::getLastSplitPoint.
Original commit message:
Rewrite RAGreedy::splitAroundRegion, now with cool ASCII art.
This function has to deal with a lot of special cases, and the old
version got it wrong sometimes. In particular, it would sometimes leave
multiple uses in the stack interval in a single block. That causes bad
code with multiple reloads in the same basic block.
The new version handles block entry and exit in a single pass. It first
eliminates all the easy cases, and then goes on to create a local
interval for the blocks with difficult interference. Previously, we
would only create the local interval for completely isolated blocks.
It can happen that the stack interval becomes completely empty because
we could allocate a register in all edge bundles, and the new local
intervals deal with the interference. The empty stack interval is
harmless, but we need to remove a SplitKit assertion that checks for
empty intervals.
llvm-svn: 134125
2011-06-30 09:30:39 +08:00
|
|
|
SE->splitSingleBlock(BI);
|
2011-04-13 03:32:53 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-07-27 07:41:46 +08:00
|
|
|
if (IntvIn && IntvOut)
|
|
|
|
SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
|
|
|
|
else if (IntvIn)
|
|
|
|
SE->splitRegInBlock(BI, IntvIn, IntfIn);
|
2011-07-16 05:47:57 +08:00
|
|
|
else
|
2011-07-27 07:41:46 +08:00
|
|
|
SE->splitRegOutBlock(BI, IntvOut, IntfOut);
|
2011-01-20 06:11:48 +08:00
|
|
|
}
|
|
|
|
|
2011-07-27 07:41:46 +08:00
|
|
|
// Handle live-through blocks. The relevant live-through blocks are stored in
|
|
|
|
// the ActiveBlocks list with each candidate. We need to filter out
|
|
|
|
// duplicates.
|
|
|
|
BitVector Todo = SA->getThroughBlocks();
|
2021-12-05 00:48:04 +08:00
|
|
|
for (unsigned UsedCand : UsedCands) {
|
|
|
|
ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned Number : Blocks) {
|
2011-07-27 07:41:46 +08:00
|
|
|
if (!Todo.test(Number))
|
|
|
|
continue;
|
|
|
|
Todo.reset(Number);
|
|
|
|
|
|
|
|
unsigned IntvIn = 0, IntvOut = 0;
|
|
|
|
SlotIndex IntfIn, IntfOut;
|
|
|
|
|
2017-06-07 06:22:41 +08:00
|
|
|
unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
|
2011-07-27 07:41:46 +08:00
|
|
|
if (CandIn != NoCand) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[CandIn];
|
|
|
|
IntvIn = Cand.IntvIdx;
|
|
|
|
Cand.Intf.moveToBlock(Number);
|
|
|
|
IntfIn = Cand.Intf.first();
|
|
|
|
}
|
|
|
|
|
2017-06-07 06:22:41 +08:00
|
|
|
unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
|
2011-07-27 07:41:46 +08:00
|
|
|
if (CandOut != NoCand) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[CandOut];
|
|
|
|
IntvOut = Cand.IntvIdx;
|
|
|
|
Cand.Intf.moveToBlock(Number);
|
|
|
|
IntfOut = Cand.Intf.last();
|
|
|
|
}
|
|
|
|
if (!IntvIn && !IntvOut)
|
|
|
|
continue;
|
|
|
|
SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
|
|
|
|
}
|
2011-04-06 11:57:00 +08:00
|
|
|
}
|
|
|
|
|
2011-02-18 06:53:48 +08:00
|
|
|
++NumGlobalSplits;
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-04-22 02:38:15 +08:00
|
|
|
SmallVector<unsigned, 8> IntvMap;
|
|
|
|
SE->finish(&IntvMap);
|
2013-08-15 07:50:04 +08:00
|
|
|
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
|
2011-05-07 02:00:02 +08:00
|
|
|
|
2011-05-28 10:32:57 +08:00
|
|
|
unsigned OrigBlocks = SA->getNumLiveBlocks();
|
2011-04-22 02:38:15 +08:00
|
|
|
|
|
|
|
// Sort out the new intervals created by splitting. We get four kinds:
|
|
|
|
// - Remainder intervals should not be split again.
|
|
|
|
// - Candidate intervals can be assigned to Cand.PhysReg.
|
|
|
|
// - Block-local splits are candidates for local splitting.
|
|
|
|
// - DCE leftovers should go back on the queue.
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
|
2021-11-30 09:18:29 +08:00
|
|
|
const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
|
2011-04-22 02:38:15 +08:00
|
|
|
|
|
|
|
// Ignore old intervals from DCE.
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
|
2011-04-22 02:38:15 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Remainder interval. Don't try splitting again, spill if it doesn't
|
|
|
|
// allocate.
|
2020-09-22 07:27:09 +08:00
|
|
|
if (IntvMap[I] == 0) {
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(Reg, RS_Spill);
|
2011-04-22 02:38:15 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-07-27 07:41:46 +08:00
|
|
|
// Global intervals. Allow repeated splitting as long as the number of live
|
|
|
|
// blocks is strictly decreasing.
|
2020-09-22 07:27:09 +08:00
|
|
|
if (IntvMap[I] < NumGlobalIntvs) {
|
2011-07-02 09:37:09 +08:00
|
|
|
if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
|
|
|
|
<< " blocks as original.\n");
|
2011-04-27 06:33:12 +08:00
|
|
|
// Don't allow repeated splitting as a safe guard against looping.
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(Reg, RS_Split2);
|
2011-04-27 06:33:12 +08:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Other intervals are treated as new. This includes local intervals created
|
|
|
|
// for blocks with multiple uses, and anything created by DCE.
|
2011-04-22 02:38:15 +08:00
|
|
|
}
|
|
|
|
|
2011-03-28 06:49:21 +08:00
|
|
|
if (VerifyEnabled)
|
2011-01-20 06:11:48 +08:00
|
|
|
MF->verify(this, "After splitting live range around region");
|
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
|
2020-10-23 01:30:30 +08:00
|
|
|
AllocationOrder &Order,
|
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2020-02-04 02:22:50 +08:00
|
|
|
if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
|
2020-10-23 01:30:30 +08:00
|
|
|
return MCRegister::NoRegister;
|
2011-07-14 08:17:10 +08:00
|
|
|
unsigned NumCands = 0;
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
BlockFrequency SpillCost = calcSpillCost();
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency BestCost;
|
2011-07-27 07:41:46 +08:00
|
|
|
|
|
|
|
// Check if we can split this live range around a compact region.
|
2011-09-13 00:54:42 +08:00
|
|
|
bool HasCompact = calcCompactRegion(GlobalCand.front());
|
2011-07-27 07:41:46 +08:00
|
|
|
if (HasCompact) {
|
|
|
|
// Yes, keep GlobalCand[0] as the compact region candidate.
|
|
|
|
NumCands = 1;
|
2013-07-17 02:26:18 +08:00
|
|
|
BestCost = BlockFrequency::getMaxFrequency();
|
2011-07-27 07:41:46 +08:00
|
|
|
} else {
|
|
|
|
// No benefit from the compact region, our fallback will be per-block
|
|
|
|
// splitting. Make sure we find a solution that is cheaper than spilling.
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
BestCost = SpillCost;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
|
|
|
|
MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
|
2011-07-27 07:41:46 +08:00
|
|
|
}
|
2011-03-05 09:10:31 +08:00
|
|
|
|
2022-03-08 00:23:05 +08:00
|
|
|
unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
|
|
|
|
NumCands, false /*IgnoreCSR*/);
|
2014-03-25 07:23:42 +08:00
|
|
|
|
|
|
|
// No solutions found, fall back to single block splitting.
|
|
|
|
if (!HasCompact && BestCand == NoCand)
|
2020-10-23 01:30:30 +08:00
|
|
|
return MCRegister::NoRegister;
|
2014-03-25 07:23:42 +08:00
|
|
|
|
|
|
|
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
|
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
|
2014-03-25 07:23:42 +08:00
|
|
|
AllocationOrder &Order,
|
|
|
|
BlockFrequency &BestCost,
|
2022-03-08 00:23:05 +08:00
|
|
|
unsigned &NumCands,
|
|
|
|
bool IgnoreCSR) {
|
2014-03-25 07:23:42 +08:00
|
|
|
unsigned BestCand = NoCand;
|
2020-09-24 12:58:45 +08:00
|
|
|
for (MCPhysReg PhysReg : Order) {
|
|
|
|
assert(PhysReg);
|
2021-12-14 14:49:57 +08:00
|
|
|
if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
|
2015-07-15 01:38:17 +08:00
|
|
|
continue;
|
2014-03-25 08:16:25 +08:00
|
|
|
|
2011-07-14 13:35:11 +08:00
|
|
|
// Discard bad candidates before we run out of interference cache cursors.
|
|
|
|
// This will only affect register classes with a lot of registers (>32).
|
|
|
|
if (NumCands == IntfCache.getMaxCursors()) {
|
|
|
|
unsigned WorstCount = ~0u;
|
|
|
|
unsigned Worst = 0;
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
|
|
|
|
if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
|
2011-07-14 13:35:11 +08:00
|
|
|
continue;
|
2020-09-22 07:27:09 +08:00
|
|
|
unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
|
2016-02-19 06:09:30 +08:00
|
|
|
if (Count < WorstCount) {
|
2020-09-22 07:27:09 +08:00
|
|
|
Worst = CandIndex;
|
2016-02-19 06:09:30 +08:00
|
|
|
WorstCount = Count;
|
|
|
|
}
|
2011-07-14 13:35:11 +08:00
|
|
|
}
|
|
|
|
--NumCands;
|
|
|
|
GlobalCand[Worst] = GlobalCand[NumCands];
|
2011-11-01 08:02:31 +08:00
|
|
|
if (BestCand == NumCands)
|
|
|
|
BestCand = Worst;
|
2011-07-14 13:35:11 +08:00
|
|
|
}
|
|
|
|
|
2011-07-14 08:17:10 +08:00
|
|
|
if (GlobalCand.size() <= NumCands)
|
|
|
|
GlobalCand.resize(NumCands+1);
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[NumCands];
|
|
|
|
Cand.reset(IntfCache, PhysReg);
|
2011-03-05 09:10:31 +08:00
|
|
|
|
2011-07-14 08:17:10 +08:00
|
|
|
SpillPlacer->prepare(Cand.LiveBundles);
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency Cost;
|
2011-07-14 08:17:10 +08:00
|
|
|
if (!addSplitConstraints(Cand.Intf, Cost)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
|
2011-04-07 05:32:38 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
|
|
|
|
MBFI->printBlockFreq(dbgs(), Cost));
|
2011-04-23 06:47:40 +08:00
|
|
|
if (Cost >= BestCost) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
2011-04-23 06:47:40 +08:00
|
|
|
if (BestCand == NoCand)
|
|
|
|
dbgs() << " worse than no bundles\n";
|
|
|
|
else
|
|
|
|
dbgs() << " worse than "
|
2017-11-28 20:42:37 +08:00
|
|
|
<< printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
|
2011-04-23 06:47:40 +08:00
|
|
|
});
|
2011-01-19 05:13:27 +08:00
|
|
|
continue;
|
2011-03-05 11:28:51 +08:00
|
|
|
}
|
2018-09-26 02:37:38 +08:00
|
|
|
if (!growRegion(Cand)) {
|
|
|
|
LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
|
|
|
|
continue;
|
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-04-07 03:13:57 +08:00
|
|
|
SpillPlacer->finish();
|
|
|
|
|
2011-01-20 06:11:48 +08:00
|
|
|
// No live bundles, defer to splitSingleBlocks().
|
2011-07-14 08:17:10 +08:00
|
|
|
if (!Cand.LiveBundles.any()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " no bundles.\n");
|
2011-01-20 06:11:48 +08:00
|
|
|
continue;
|
2011-03-05 11:28:51 +08:00
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2022-03-08 00:23:05 +08:00
|
|
|
Cost += calcGlobalSplitCost(Cand, Order);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
|
|
|
dbgs() << ", total = ";
|
|
|
|
MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
|
2020-09-22 07:27:09 +08:00
|
|
|
for (int I : Cand.LiveBundles.set_bits())
|
|
|
|
dbgs() << " EB#" << I;
|
2011-03-05 11:28:51 +08:00
|
|
|
dbgs() << ".\n";
|
|
|
|
});
|
2011-04-23 06:47:40 +08:00
|
|
|
if (Cost < BestCost) {
|
2011-07-14 08:17:10 +08:00
|
|
|
BestCand = NumCands;
|
2013-07-17 02:26:18 +08:00
|
|
|
BestCost = Cost;
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
2011-07-14 08:17:10 +08:00
|
|
|
++NumCands;
|
2011-01-19 05:13:27 +08:00
|
|
|
}
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
|
2014-03-25 07:23:42 +08:00
|
|
|
return BestCand;
|
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
|
2014-03-25 07:23:42 +08:00
|
|
|
bool HasCompact,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2014-03-25 07:23:42 +08:00
|
|
|
SmallVector<unsigned, 8> UsedCands;
|
2011-07-27 07:41:46 +08:00
|
|
|
// Prepare split editor.
|
2016-04-13 11:08:27 +08:00
|
|
|
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
|
Add an interface for SplitKit complement spill modes.
SplitKit always computes a complement live range to cover the places
where the original live range was live, but no explicit region has been
allocated.
Currently, the complement live range is created to be as small as
possible - it never overlaps any of the regions. This minimizes
register pressure, but if the complement is going to be spilled anyway,
that is not very important. The spiller will eliminate redundant
spills, and hoist others by making the spill slot live range overlap
some of the regions created by splitting. Stack slots are cheap.
This patch adds the interface to enable spill modes in SplitKit. In
spill mode, SplitKit will assume that the complement is going to spill,
so it will allow it to overlap regions in order to avoid back-copies.
By doing some of the spiller's work early, the complement live range
becomes simpler. In some cases, it can become much simpler because no
extra PHI-defs are required. This will speed up both splitting and
spilling.
This is only the interface to enable spill modes, no implementation yet.
llvm-svn: 139500
2011-09-13 00:49:21 +08:00
|
|
|
SE->reset(LREdit, SplitSpillMode);
|
2011-07-27 07:41:46 +08:00
|
|
|
|
|
|
|
// Assign all edge bundles to the preferred candidate, or NoCand.
|
|
|
|
BundleCand.assign(Bundles->getNumBundles(), NoCand);
|
|
|
|
|
|
|
|
// Assign bundles for the best candidate region.
|
|
|
|
if (BestCand != NoCand) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand[BestCand];
|
|
|
|
if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
|
|
|
|
UsedCands.push_back(BestCand);
|
|
|
|
Cand.IntvIdx = SE->openIntv();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
|
|
|
|
<< B << " bundles, intv " << Cand.IntvIdx << ".\n");
|
2011-08-04 07:07:27 +08:00
|
|
|
(void)B;
|
2011-07-27 07:41:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assign bundles for the compact region.
|
|
|
|
if (HasCompact) {
|
|
|
|
GlobalSplitCandidate &Cand = GlobalCand.front();
|
|
|
|
assert(!Cand.PhysReg && "Compact region has no physreg");
|
|
|
|
if (unsigned B = Cand.getBundles(BundleCand, 0)) {
|
|
|
|
UsedCands.push_back(0);
|
|
|
|
Cand.IntvIdx = SE->openIntv();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Split for compact region in " << B
|
|
|
|
<< " bundles, intv " << Cand.IntvIdx << ".\n");
|
2011-08-04 07:07:27 +08:00
|
|
|
(void)B;
|
2011-07-27 07:41:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
splitAroundRegion(LREdit, UsedCands);
|
2011-01-19 05:13:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-06 07:04:18 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Per-Block Splitting
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// tryBlockSplit - Split a global live range around every block with uses. This
|
|
|
|
/// creates a lot of local live ranges, that will be split by tryLocalSplit if
|
|
|
|
/// they don't allocate.
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
|
|
|
|
AllocationOrder &Order,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2011-08-06 07:04:18 +08:00
|
|
|
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
|
2020-09-16 05:54:38 +08:00
|
|
|
Register Reg = VirtReg.reg();
|
2011-08-06 07:04:18 +08:00
|
|
|
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
|
2016-04-13 11:08:27 +08:00
|
|
|
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
|
Add an interface for SplitKit complement spill modes.
SplitKit always computes a complement live range to cover the places
where the original live range was live, but no explicit region has been
allocated.
Currently, the complement live range is created to be as small as
possible - it never overlaps any of the regions. This minimizes
register pressure, but if the complement is going to be spilled anyway,
that is not very important. The spiller will eliminate redundant
spills, and hoist others by making the spill slot live range overlap
some of the regions created by splitting. Stack slots are cheap.
This patch adds the interface to enable spill modes in SplitKit. In
spill mode, SplitKit will assume that the complement is going to spill,
so it will allow it to overlap regions in order to avoid back-copies.
By doing some of the spiller's work early, the complement live range
becomes simpler. In some cases, it can become much simpler because no
extra PHI-defs are required. This will speed up both splitting and
spilling.
This is only the interface to enable spill modes, no implementation yet.
llvm-svn: 139500
2011-09-13 00:49:21 +08:00
|
|
|
SE->reset(LREdit, SplitSpillMode);
|
2011-08-06 07:04:18 +08:00
|
|
|
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
|
2020-09-22 07:27:09 +08:00
|
|
|
for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
|
2011-08-06 07:04:18 +08:00
|
|
|
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
|
|
|
|
SE->splitSingleBlock(BI);
|
|
|
|
}
|
|
|
|
// No blocks were split.
|
|
|
|
if (LREdit.empty())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// We did split for some blocks.
|
2011-08-06 07:50:31 +08:00
|
|
|
SmallVector<unsigned, 8> IntvMap;
|
|
|
|
SE->finish(&IntvMap);
|
2011-08-06 07:10:40 +08:00
|
|
|
|
|
|
|
// Tell LiveDebugVariables about the new ranges.
|
2013-08-15 07:50:04 +08:00
|
|
|
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
|
2011-08-06 07:10:40 +08:00
|
|
|
|
2011-08-06 07:50:31 +08:00
|
|
|
// Sort out the new intervals created by splitting. The remainder interval
|
|
|
|
// goes straight to spilling, the new local ranges get to stay RS_New.
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
|
2021-11-30 09:18:29 +08:00
|
|
|
const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
|
|
|
|
ExtraInfo->setStage(LI, RS_Spill);
|
2011-08-06 07:50:31 +08:00
|
|
|
}
|
|
|
|
|
2011-08-06 07:04:18 +08:00
|
|
|
if (VerifyEnabled)
|
|
|
|
MF->verify(this, "After splitting live range around basic blocks");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-05-24 06:37:27 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Per-Instruction Splitting
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-01-03 06:47:22 +08:00
|
|
|
/// Get the number of allocatable registers that match the constraints of \p Reg
|
|
|
|
/// on \p MI and that are also in \p SuperRC.
|
|
|
|
static unsigned getNumAllocatableRegsForConstraints(
|
2020-11-03 12:07:58 +08:00
|
|
|
const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
|
2014-01-03 06:47:22 +08:00
|
|
|
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
|
|
|
|
const RegisterClassInfo &RCI) {
|
|
|
|
assert(SuperRC && "Invalid register class");
|
|
|
|
|
|
|
|
const TargetRegisterClass *ConstrainedRC =
|
|
|
|
MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
|
|
|
|
/* ExploreBundle */ true);
|
|
|
|
if (!ConstrainedRC)
|
|
|
|
return 0;
|
|
|
|
return RCI.getNumAllocatableRegs(ConstrainedRC);
|
|
|
|
}
|
|
|
|
|
2012-05-24 06:37:27 +08:00
|
|
|
/// tryInstructionSplit - Split a live range around individual instructions.
|
|
|
|
/// This is normally not worthwhile since the spiller is doing essentially the
|
|
|
|
/// same thing. However, when the live range is in a constrained register
|
|
|
|
/// class, it may help to insert copies such that parts of the live range can
|
|
|
|
/// be moved to a larger register class.
|
|
|
|
///
|
|
|
|
/// This is similar to spilling to a larger register class.
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
|
|
|
|
AllocationOrder &Order,
|
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2020-09-16 05:54:38 +08:00
|
|
|
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
|
2012-05-24 06:37:27 +08:00
|
|
|
// There is no point to this if there are no larger sub-classes.
|
2014-01-03 06:47:22 +08:00
|
|
|
if (!RegClassInfo.isProperSubClass(CurRC))
|
2012-05-24 06:37:27 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Always enable split spill mode, since we're effectively spilling to a
|
|
|
|
// register.
|
2016-04-13 11:08:27 +08:00
|
|
|
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
|
2012-05-24 06:37:27 +08:00
|
|
|
SE->reset(LREdit, SplitEditor::SM_Size);
|
|
|
|
|
|
|
|
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
|
|
|
|
if (Uses.size() <= 1)
|
|
|
|
return 0;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
|
|
|
|
<< " individual instrs.\n");
|
2012-05-24 06:37:27 +08:00
|
|
|
|
2015-03-11 07:46:01 +08:00
|
|
|
const TargetRegisterClass *SuperRC =
|
|
|
|
TRI->getLargestLegalSuperClass(CurRC, *MF);
|
2014-01-03 06:47:22 +08:00
|
|
|
unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
|
|
|
|
// Split around every non-copy instruction if this split will relax
|
|
|
|
// the constraints on the virtual register.
|
|
|
|
// Otherwise, splitting just inserts uncoalescable copies that do not help
|
|
|
|
// the allocation.
|
2021-09-02 06:31:17 +08:00
|
|
|
for (const SlotIndex Use : Uses) {
|
2020-09-22 07:27:09 +08:00
|
|
|
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
|
2014-01-03 06:47:22 +08:00
|
|
|
if (MI->isFullCopy() ||
|
|
|
|
SuperRCNumAllocatableRegs ==
|
2020-09-16 05:54:38 +08:00
|
|
|
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
|
|
|
|
TII, TRI, RCI)) {
|
2020-09-22 07:27:09 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
|
2012-05-24 06:37:27 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
SE->openIntv();
|
2020-09-22 07:27:09 +08:00
|
|
|
SlotIndex SegStart = SE->enterIntvBefore(Use);
|
|
|
|
SlotIndex SegStop = SE->leaveIntvAfter(Use);
|
2012-05-24 06:37:27 +08:00
|
|
|
SE->useIntv(SegStart, SegStop);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LREdit.empty()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "All uses were copies.\n");
|
2012-05-24 06:37:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SmallVector<unsigned, 8> IntvMap;
|
|
|
|
SE->finish(&IntvMap);
|
2020-09-16 05:54:38 +08:00
|
|
|
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
|
2012-05-24 06:37:27 +08:00
|
|
|
// Assign all new registers to RS_Spill. This was the last chance.
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
|
2012-05-24 06:37:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-02-18 03:13:53 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Local Splitting
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
|
|
|
|
/// in order to use PhysReg between two entries in SA->UseSlots.
|
|
|
|
///
|
2020-09-22 07:27:09 +08:00
|
|
|
/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
|
2011-02-18 03:13:53 +08:00
|
|
|
///
|
2020-10-23 01:30:30 +08:00
|
|
|
void RAGreedy::calcGapWeights(MCRegister PhysReg,
|
2011-02-18 03:13:53 +08:00
|
|
|
SmallVectorImpl<float> &GapWeight) {
|
2011-04-06 11:57:00 +08:00
|
|
|
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
|
|
|
|
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
|
2012-01-13 01:53:44 +08:00
|
|
|
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
|
2011-02-18 03:13:53 +08:00
|
|
|
const unsigned NumGaps = Uses.size()-1;
|
|
|
|
|
|
|
|
// Start and end points for the interference check.
|
2011-08-03 06:54:14 +08:00
|
|
|
SlotIndex StartIdx =
|
|
|
|
BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
|
|
|
|
SlotIndex StopIdx =
|
|
|
|
BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
|
2011-02-18 03:13:53 +08:00
|
|
|
|
|
|
|
GapWeight.assign(NumGaps, 0.0f);
|
|
|
|
|
|
|
|
// Add interference from each overlapping register.
|
2012-06-21 06:52:26 +08:00
|
|
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
|
|
|
if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
|
|
|
|
.checkInterference())
|
2011-02-18 03:13:53 +08:00
|
|
|
continue;
|
|
|
|
|
2011-08-03 06:54:14 +08:00
|
|
|
// We know that VirtReg is a continuous interval from FirstInstr to
|
|
|
|
// LastInstr, so we don't need InterferenceQuery.
|
2011-02-18 03:13:53 +08:00
|
|
|
//
|
|
|
|
// Interference that overlaps an instruction is counted in both gaps
|
|
|
|
// surrounding the instruction. The exception is interference before
|
|
|
|
// StartIdx and after StopIdx.
|
|
|
|
//
|
2012-06-21 06:52:26 +08:00
|
|
|
LiveIntervalUnion::SegmentIter IntI =
|
|
|
|
Matrix->getLiveUnions()[*Units] .find(StartIdx);
|
2011-02-18 03:13:53 +08:00
|
|
|
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
|
|
|
|
// Skip the gaps before IntI.
|
|
|
|
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
|
|
|
|
if (++Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
if (Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Update the gaps covered by IntI.
|
2020-09-16 05:54:38 +08:00
|
|
|
const float weight = IntI.value()->weight();
|
2011-02-18 03:13:53 +08:00
|
|
|
for (; Gap != NumGaps; ++Gap) {
|
|
|
|
GapWeight[Gap] = std::max(GapWeight[Gap], weight);
|
|
|
|
if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2012-06-21 06:52:26 +08:00
|
|
|
|
|
|
|
// Add fixed interference.
|
|
|
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
2013-10-11 05:29:02 +08:00
|
|
|
const LiveRange &LR = LIS->getRegUnit(*Units);
|
|
|
|
LiveRange::const_iterator I = LR.find(StartIdx);
|
|
|
|
LiveRange::const_iterator E = LR.end();
|
2012-06-21 06:52:26 +08:00
|
|
|
|
|
|
|
// Same loop as above. Mark any overlapped gaps as HUGE_VALF.
|
|
|
|
for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
|
|
|
|
while (Uses[Gap+1].getBoundaryIndex() < I->start)
|
|
|
|
if (++Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
if (Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
|
|
|
|
for (; Gap != NumGaps; ++Gap) {
|
2017-06-07 06:22:41 +08:00
|
|
|
GapWeight[Gap] = huge_valf;
|
2012-06-21 06:52:26 +08:00
|
|
|
if (Uses[Gap+1].getBaseIndex() >= I->end)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (Gap == NumGaps)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-02-18 03:13:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
|
|
|
|
/// basic block.
|
|
|
|
///
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
|
|
|
|
AllocationOrder &Order,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2018-09-21 04:05:57 +08:00
|
|
|
// TODO: the function currently only handles a single UseBlock; it should be
|
|
|
|
// possible to generalize.
|
|
|
|
if (SA->getUseBlocks().size() != 1)
|
|
|
|
return 0;
|
|
|
|
|
2011-04-06 11:57:00 +08:00
|
|
|
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
|
2011-02-18 03:13:53 +08:00
|
|
|
|
|
|
|
// Note that it is possible to have an interval that is live-in or live-out
|
|
|
|
// while only covering a single block - A phi-def can use undef values from
|
|
|
|
// predecessors, and the block could be a single-block loop.
|
|
|
|
// We don't bother doing anything clever about such a case, we simply assume
|
2011-08-03 06:54:14 +08:00
|
|
|
// that the interval is continuous from FirstInstr to LastInstr. We should
|
|
|
|
// make sure that we don't do anything illegal to such an interval, though.
|
2011-02-18 03:13:53 +08:00
|
|
|
|
2012-01-13 01:53:44 +08:00
|
|
|
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
|
2011-02-18 03:13:53 +08:00
|
|
|
if (Uses.size() <= 2)
|
|
|
|
return 0;
|
|
|
|
const unsigned NumGaps = Uses.size()-1;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG({
|
2011-02-18 03:13:53 +08:00
|
|
|
dbgs() << "tryLocalSplit: ";
|
2020-09-22 07:27:09 +08:00
|
|
|
for (const auto &Use : Uses)
|
|
|
|
dbgs() << ' ' << Use;
|
2011-02-18 03:13:53 +08:00
|
|
|
dbgs() << '\n';
|
|
|
|
});
|
|
|
|
|
2012-02-11 08:42:18 +08:00
|
|
|
// If VirtReg is live across any register mask operands, compute a list of
|
|
|
|
// gaps with register masks.
|
|
|
|
SmallVector<unsigned, 8> RegMaskGaps;
|
2012-06-21 06:52:26 +08:00
|
|
|
if (Matrix->checkRegMaskInterference(VirtReg)) {
|
2012-02-11 08:42:18 +08:00
|
|
|
// Get regmask slots for the whole block.
|
|
|
|
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
|
2012-02-11 08:42:18 +08:00
|
|
|
// Constrain to VirtReg's live range.
|
2020-09-22 07:27:09 +08:00
|
|
|
unsigned RI =
|
2019-04-12 10:02:06 +08:00
|
|
|
llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
|
2020-09-22 07:27:09 +08:00
|
|
|
unsigned RE = RMS.size();
|
|
|
|
for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
|
|
|
|
// Look for Uses[I] <= RMS <= Uses[I + 1].
|
|
|
|
assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
|
|
|
|
if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
|
2012-02-11 08:42:18 +08:00
|
|
|
continue;
|
2012-02-15 07:51:27 +08:00
|
|
|
// Skip a regmask on the same instruction as the last use. It doesn't
|
|
|
|
// overlap the live range.
|
2020-09-22 07:27:09 +08:00
|
|
|
if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
|
2012-02-15 07:51:27 +08:00
|
|
|
break;
|
2020-09-22 07:27:09 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
|
|
|
|
<< Uses[I + 1]);
|
|
|
|
RegMaskGaps.push_back(I);
|
2012-02-15 07:51:27 +08:00
|
|
|
// Advance ri to the next gap. A regmask on one of the uses counts in
|
|
|
|
// both gaps.
|
2020-09-22 07:27:09 +08:00
|
|
|
while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
|
|
|
|
++RI;
|
2012-02-11 08:42:18 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << '\n');
|
2012-02-11 08:42:18 +08:00
|
|
|
}
|
|
|
|
|
2011-06-07 07:55:20 +08:00
|
|
|
// Since we allow local split results to be split again, there is a risk of
|
|
|
|
// creating infinite loops. It is tempting to require that the new live
|
|
|
|
// ranges have less instructions than the original. That would guarantee
|
|
|
|
// convergence, but it is too strict. A live range with 3 instructions can be
|
|
|
|
// split 2+3 (including the COPY), and we want to allow that.
|
|
|
|
//
|
|
|
|
// Instead we use these rules:
|
|
|
|
//
|
2011-07-25 23:25:43 +08:00
|
|
|
// 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
|
2011-06-07 07:55:20 +08:00
|
|
|
// noop split, of course).
|
2011-07-25 23:25:43 +08:00
|
|
|
// 2. Require progress be made for ranges with getStage() == RS_Split2. All
|
2011-06-07 07:55:20 +08:00
|
|
|
// the new ranges must have fewer instructions than before the split.
|
2011-07-25 23:25:43 +08:00
|
|
|
// 3. New ranges with the same number of instructions are marked RS_Split2,
|
2011-06-07 07:55:20 +08:00
|
|
|
// smaller ranges are marked RS_New.
|
|
|
|
//
|
|
|
|
// These rules allow a 3 -> 2+3 split once, which we need. They also prevent
|
|
|
|
// excessive splitting and infinite loops.
|
|
|
|
//
|
2021-12-01 08:30:16 +08:00
|
|
|
bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
|
2011-02-18 03:13:53 +08:00
|
|
|
|
2011-06-07 07:55:20 +08:00
|
|
|
// Best split candidate.
|
2011-02-18 03:13:53 +08:00
|
|
|
unsigned BestBefore = NumGaps;
|
|
|
|
unsigned BestAfter = 0;
|
|
|
|
float BestDiff = 0;
|
|
|
|
|
2013-07-17 02:26:18 +08:00
|
|
|
const float blockFreq =
|
|
|
|
SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
|
2013-12-14 10:37:38 +08:00
|
|
|
(1.0f / MBFI->getEntryFreq());
|
2011-02-18 03:13:53 +08:00
|
|
|
SmallVector<float, 8> GapWeight;
|
|
|
|
|
2020-09-24 12:58:45 +08:00
|
|
|
for (MCPhysReg PhysReg : Order) {
|
|
|
|
assert(PhysReg);
|
2011-02-18 03:13:53 +08:00
|
|
|
// Keep track of the largest spill weight that would need to be evicted in
|
2020-09-22 07:27:09 +08:00
|
|
|
// order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
|
2011-02-18 03:13:53 +08:00
|
|
|
calcGapWeights(PhysReg, GapWeight);
|
|
|
|
|
2012-02-11 08:42:18 +08:00
|
|
|
// Remove any gaps with regmask clobbers.
|
2012-06-21 06:52:26 +08:00
|
|
|
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
|
|
|
|
GapWeight[RegMaskGaps[I]] = huge_valf;
|
2012-02-11 08:42:18 +08:00
|
|
|
|
2011-02-18 03:13:53 +08:00
|
|
|
// Try to find the best sequence of gaps to close.
|
|
|
|
// The new spill weight must be larger than any gap interference.
|
|
|
|
|
|
|
|
// We will split before Uses[SplitBefore] and after Uses[SplitAfter].
|
2011-06-07 07:55:20 +08:00
|
|
|
unsigned SplitBefore = 0, SplitAfter = 1;
|
2011-02-18 03:13:53 +08:00
|
|
|
|
|
|
|
// MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
|
|
|
|
// It is the spill weight that needs to be evicted.
|
|
|
|
float MaxGap = GapWeight[0];
|
|
|
|
|
2017-06-07 06:22:41 +08:00
|
|
|
while (true) {
|
2011-02-18 03:13:53 +08:00
|
|
|
// Live before/after split?
|
|
|
|
const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
|
|
|
|
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
|
2020-09-22 07:27:09 +08:00
|
|
|
<< '-' << Uses[SplitAfter] << " I=" << MaxGap);
|
2011-02-18 03:13:53 +08:00
|
|
|
|
|
|
|
// Stop before the interval gets so big we wouldn't be making progress.
|
|
|
|
if (!LiveBefore && !LiveAfter) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " all\n");
|
2011-02-18 03:13:53 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Should the interval be extended or shrunk?
|
|
|
|
bool Shrink = true;
|
|
|
|
|
2011-06-07 07:55:20 +08:00
|
|
|
// How many gaps would the new range have?
|
|
|
|
unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
|
|
|
|
|
|
|
|
// Legally, without causing looping?
|
|
|
|
bool Legal = !ProgressRequired || NewGaps < NumGaps;
|
|
|
|
|
2017-06-07 06:22:41 +08:00
|
|
|
if (Legal && MaxGap < huge_valf) {
|
2011-06-07 07:55:20 +08:00
|
|
|
// Estimate the new spill weight. Each instruction reads or writes the
|
|
|
|
// register. Conservatively assume there are no read-modify-write
|
|
|
|
// instructions.
|
2011-02-18 03:13:53 +08:00
|
|
|
//
|
2011-06-07 07:55:20 +08:00
|
|
|
// Try to guess the size of the new interval.
|
[PBQP] Tweak spill costs and coalescing benefits
This patch improves how the different costs (register, interference, spill
and coalescing) relates together. The assumption is now that:
- coalescing (or any other "side effect" of reg alloc) is negative, and
instead of being derived from a spill cost, they use the block
frequency info.
- spill costs are in the [MinSpillCost:+inf( range
- register or interference costs are in [0.0:MinSpillCost( or +inf
The current MinSpillCost is set to 10.0, which is a random value high
enough that the current constraint builders do not need to worry about
when settings costs. It would however be worth adding a normalization
step for register and interference costs as the last step in the
constraint builder chain to ensure they are not greater than SpillMinCost
(unless this has some sense for some architectures). This would work well
with the current builder pipeline, where all costs are tweaked relatively
to each others, but could grow above MinSpillCost if the pipeline is
deep enough.
The current heuristic is tuned to depend rather on the number of uses of
a live interval rather than a density of uses, as used by the greedy
allocator. This heuristic provides a few percent improvement on a number
of benchmarks (eembc, spec, ...) and will definitely need to change once
spill placement is implemented: the current spill placement is really
ineficient, so making the cost proportionnal to the number of use is a
clear win.
llvm-svn: 221292
2014-11-05 04:51:24 +08:00
|
|
|
const float EstWeight = normalizeSpillWeight(
|
|
|
|
blockFreq * (NewGaps + 1),
|
|
|
|
Uses[SplitBefore].distance(Uses[SplitAfter]) +
|
|
|
|
(LiveBefore + LiveAfter) * SlotIndex::InstrDist,
|
|
|
|
1);
|
2011-02-18 03:13:53 +08:00
|
|
|
// Would this split be possible to allocate?
|
|
|
|
// Never allocate all gaps, we wouldn't be making progress.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " w=" << EstWeight);
|
2011-04-30 13:07:46 +08:00
|
|
|
if (EstWeight * Hysteresis >= MaxGap) {
|
2011-02-18 03:13:53 +08:00
|
|
|
Shrink = false;
|
2011-04-30 13:07:46 +08:00
|
|
|
float Diff = EstWeight - MaxGap;
|
2011-02-18 03:13:53 +08:00
|
|
|
if (Diff > BestDiff) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " (best)");
|
2011-04-30 13:07:46 +08:00
|
|
|
BestDiff = Hysteresis * Diff;
|
2011-02-18 03:13:53 +08:00
|
|
|
BestBefore = SplitBefore;
|
|
|
|
BestAfter = SplitAfter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to shrink.
|
|
|
|
if (Shrink) {
|
2011-06-07 07:55:20 +08:00
|
|
|
if (++SplitBefore < SplitAfter) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " shrink\n");
|
2011-02-18 03:13:53 +08:00
|
|
|
// Recompute the max when necessary.
|
|
|
|
if (GapWeight[SplitBefore - 1] >= MaxGap) {
|
|
|
|
MaxGap = GapWeight[SplitBefore];
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
|
|
|
|
MaxGap = std::max(MaxGap, GapWeight[I]);
|
2011-02-18 03:13:53 +08:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
MaxGap = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to extend the interval.
|
|
|
|
if (SplitAfter >= NumGaps) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " end\n");
|
2011-02-18 03:13:53 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " extend\n");
|
2011-06-07 07:55:20 +08:00
|
|
|
MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
|
2011-02-18 03:13:53 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Didn't find any candidates?
|
|
|
|
if (BestBefore == NumGaps)
|
|
|
|
return 0;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
|
|
|
|
<< Uses[BestAfter] << ", " << BestDiff << ", "
|
|
|
|
<< (BestAfter - BestBefore + 1) << " instrs\n");
|
2011-02-18 03:13:53 +08:00
|
|
|
|
2016-04-13 11:08:27 +08:00
|
|
|
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
|
2011-03-03 09:29:13 +08:00
|
|
|
SE->reset(LREdit);
|
|
|
|
|
|
|
|
SE->openIntv();
|
|
|
|
SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
|
|
|
|
SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
|
|
|
|
SE->useIntv(SegStart, SegStop);
|
2011-06-07 07:55:20 +08:00
|
|
|
SmallVector<unsigned, 8> IntvMap;
|
|
|
|
SE->finish(&IntvMap);
|
2020-09-16 05:54:38 +08:00
|
|
|
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
|
2011-06-07 07:55:20 +08:00
|
|
|
// If the new range has the same number of instructions as before, mark it as
|
2011-07-25 23:25:43 +08:00
|
|
|
// RS_Split2 so the next split will be forced to make progress. Otherwise,
|
2011-06-07 07:55:20 +08:00
|
|
|
// leave the new intervals as RS_New so they can compete.
|
|
|
|
bool LiveBefore = BestBefore != 0 || BI.LiveIn;
|
|
|
|
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
|
|
|
|
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
|
|
|
|
if (NewGaps >= NumGaps) {
|
2021-08-03 09:12:21 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
|
2011-06-07 07:55:20 +08:00
|
|
|
assert(!ProgressRequired && "Didn't make progress when it was required.");
|
2020-09-22 07:27:09 +08:00
|
|
|
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
|
|
|
|
if (IntvMap[I] == 1) {
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
|
2021-08-03 09:12:21 +08:00
|
|
|
LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
|
2011-06-07 07:55:20 +08:00
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << '\n');
|
2011-06-07 07:55:20 +08:00
|
|
|
}
|
2011-02-18 06:53:48 +08:00
|
|
|
++NumLocalSplits;
|
2011-02-18 03:13:53 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-01-20 06:11:48 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Live Range Splitting
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// trySplit - Try to split VirtReg or one of its interferences, making it
|
|
|
|
/// assignable.
|
|
|
|
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
2019-02-20 15:14:39 +08:00
|
|
|
const SmallVirtRegSet &FixedRegisters) {
|
2011-08-06 07:50:33 +08:00
|
|
|
// Ranges must be Split2 or less.
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
|
2011-08-06 07:50:33 +08:00
|
|
|
return 0;
|
|
|
|
|
2011-02-18 03:13:53 +08:00
|
|
|
// Local intervals are handled separately.
|
2011-02-19 08:38:40 +08:00
|
|
|
if (LIS->intervalIsInOneMBB(VirtReg)) {
|
2016-11-19 03:43:18 +08:00
|
|
|
NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
|
|
|
|
TimerGroupDescription, TimePassesIsEnabled);
|
2011-03-02 05:10:07 +08:00
|
|
|
SA->analyze(&VirtReg);
|
2020-06-30 23:57:24 +08:00
|
|
|
Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
|
2012-05-24 06:37:27 +08:00
|
|
|
if (PhysReg || !NewVRegs.empty())
|
|
|
|
return PhysReg;
|
|
|
|
return tryInstructionSplit(VirtReg, Order, NewVRegs);
|
2011-02-19 08:38:40 +08:00
|
|
|
}
|
|
|
|
|
2016-11-19 03:43:18 +08:00
|
|
|
NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
|
|
|
|
TimerGroupDescription, TimePassesIsEnabled);
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-03-02 05:10:07 +08:00
|
|
|
SA->analyze(&VirtReg);
|
|
|
|
|
2011-07-25 23:25:43 +08:00
|
|
|
// First try to split around a region spanning multiple blocks. RS_Split2
|
|
|
|
// ranges already made dubious progress with region splitting, so they go
|
|
|
|
// straight to single block splitting.
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
|
2011-07-25 23:25:43 +08:00
|
|
|
if (PhysReg || !NewVRegs.empty())
|
|
|
|
return PhysReg;
|
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-08-06 07:04:18 +08:00
|
|
|
// Then isolate blocks.
|
|
|
|
return tryBlockSplit(VirtReg, Order, NewVRegs);
|
2011-01-20 06:11:48 +08:00
|
|
|
}
|
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Last Chance Recoloring
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-09-28 16:22:35 +08:00
|
|
|
/// Return true if \p reg has any tied def operand.
|
|
|
|
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
|
|
|
|
for (const MachineOperand &MO : MRI->def_operands(reg))
|
|
|
|
if (MO.isTied())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-04-01 21:59:05 +08:00
|
|
|
/// Return true if the existing assignment of \p Intf overlaps, but is not the
|
|
|
|
/// same, as \p PhysReg.
|
|
|
|
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
|
|
|
|
const VirtRegMap &VRM,
|
|
|
|
MCRegister PhysReg,
|
|
|
|
const LiveInterval &Intf) {
|
|
|
|
MCRegister AssignedReg = VRM.getPhys(Intf.reg());
|
|
|
|
if (PhysReg == AssignedReg)
|
|
|
|
return false;
|
|
|
|
return TRI.regsOverlap(PhysReg, AssignedReg);
|
|
|
|
}
|
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
/// mayRecolorAllInterferences - Check if the virtual registers that
|
|
|
|
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
|
|
|
|
/// recolored to free \p PhysReg.
|
|
|
|
/// When true is returned, \p RecoloringCandidates has been augmented with all
|
|
|
|
/// the live intervals that need to be recolored in order to free \p PhysReg
|
|
|
|
/// for \p VirtReg.
|
|
|
|
/// \p FixedRegisters contains all the virtual registers that cannot be
|
|
|
|
/// recolored.
|
2020-10-23 01:30:30 +08:00
|
|
|
bool RAGreedy::mayRecolorAllInterferences(
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister PhysReg, const LiveInterval &VirtReg,
|
|
|
|
SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
|
2020-09-16 05:54:38 +08:00
|
|
|
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
|
2014-02-06 06:13:59 +08:00
|
|
|
|
|
|
|
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
|
|
|
|
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
|
|
|
|
// If there is LastChanceRecoloringMaxInterference or more interferences,
|
|
|
|
// chances are one would not be recolorable.
|
2021-10-31 11:47:22 +08:00
|
|
|
if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
|
|
|
|
LastChanceRecoloringMaxInterference &&
|
|
|
|
!ExhaustiveSearch) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
|
2014-04-04 10:05:21 +08:00
|
|
|
CutOffInfo |= CO_Interf;
|
2014-02-06 06:13:59 +08:00
|
|
|
return false;
|
|
|
|
}
|
2022-02-04 01:07:42 +08:00
|
|
|
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
|
2022-04-01 21:59:05 +08:00
|
|
|
// If Intf is done and sits on the same register class as VirtReg, it
|
|
|
|
// would not be recolorable as it is in the same state as
|
|
|
|
// VirtReg. However there are at least two exceptions.
|
|
|
|
//
|
|
|
|
// If VirtReg has tied defs and Intf doesn't, then
|
2017-09-28 16:22:35 +08:00
|
|
|
// there is still a point in examining if it can be recolorable.
|
2022-04-01 21:59:05 +08:00
|
|
|
//
|
|
|
|
// Additionally, if the register class has overlapping tuple members, it
|
|
|
|
// may still be recolorable using a different tuple. This is more likely
|
|
|
|
// if the existing assignment aliases with the candidate.
|
|
|
|
//
|
2021-12-01 08:30:16 +08:00
|
|
|
if (((ExtraInfo->getStage(*Intf) == RS_Done &&
|
2022-04-01 21:59:05 +08:00
|
|
|
MRI->getRegClass(Intf->reg()) == CurRC &&
|
|
|
|
!assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
|
2020-09-16 05:54:38 +08:00
|
|
|
!(hasTiedDef(MRI, VirtReg.reg()) &&
|
|
|
|
!hasTiedDef(MRI, Intf->reg()))) ||
|
|
|
|
FixedRegisters.count(Intf->reg())) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Early abort: the interference is not recolorable.\n");
|
2014-02-06 06:13:59 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
RecoloringCandidates.insert(Intf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
|
|
|
|
/// its interferences.
|
|
|
|
/// Last chance recoloring chooses a color for \p VirtReg and recolors every
|
|
|
|
/// virtual register that was using it. The recoloring process may recursively
|
|
|
|
/// use the last chance recoloring. Therefore, when a virtual register has been
|
|
|
|
/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
|
|
|
|
/// be last-chance-recolored again during this recoloring "session".
|
|
|
|
/// E.g.,
|
|
|
|
/// Let
|
|
|
|
/// vA can use {R1, R2 }
|
|
|
|
/// vB can use { R2, R3}
|
|
|
|
/// vC can use {R1 }
|
|
|
|
/// Where vA, vB, and vC cannot be split anymore (they are reloads for
|
|
|
|
/// instance) and they all interfere.
|
|
|
|
///
|
|
|
|
/// vA is assigned R1
|
|
|
|
/// vB is assigned R2
|
|
|
|
/// vC tries to evict vA but vA is already done.
|
|
|
|
/// Regular register allocation fails.
|
|
|
|
///
|
|
|
|
/// Last chance recoloring kicks in:
|
|
|
|
/// vC does as if vA was evicted => vC uses R1.
|
|
|
|
/// vC is marked as fixed.
|
|
|
|
/// vA needs to find a color.
|
|
|
|
/// None are available.
|
|
|
|
/// vA cannot evict vC: vC is a fixed virtual register now.
|
|
|
|
/// vA does as if vB was evicted => vA uses R2.
|
|
|
|
/// vB needs to find a color.
|
|
|
|
/// R3 is available.
|
|
|
|
/// Recoloring => vC = R1, vA = R2, vB = R3
|
|
|
|
///
|
2014-02-25 12:21:15 +08:00
|
|
|
/// \p Order defines the preferred allocation order for \p VirtReg.
|
2014-02-06 06:13:59 +08:00
|
|
|
/// \p NewRegs will contain any new virtual register that have been created
|
|
|
|
/// (split, spill) during the process and that must be assigned.
|
|
|
|
/// \p FixedRegisters contains all the virtual registers that cannot be
|
|
|
|
/// recolored.
|
2022-03-18 01:12:36 +08:00
|
|
|
///
|
|
|
|
/// \p RecolorStack tracks the original assignments of successfully recolored
|
|
|
|
/// registers.
|
|
|
|
///
|
2014-02-06 06:13:59 +08:00
|
|
|
/// \p Depth gives the current depth of the last chance recoloring.
|
|
|
|
/// \return a physical register that can be used for VirtReg or ~0u if none
|
|
|
|
/// exists.
|
2022-02-04 01:07:42 +08:00
|
|
|
unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
|
2014-02-06 06:13:59 +08:00
|
|
|
AllocationOrder &Order,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
2014-02-06 06:13:59 +08:00
|
|
|
SmallVirtRegSet &FixedRegisters,
|
2022-03-18 01:12:36 +08:00
|
|
|
RecoloringStack &RecolorStack,
|
2014-02-06 06:13:59 +08:00
|
|
|
unsigned Depth) {
|
2020-09-18 05:47:12 +08:00
|
|
|
if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
|
|
|
|
return ~0u;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
|
2022-03-18 01:12:36 +08:00
|
|
|
|
|
|
|
const ssize_t EntryStackSize = RecolorStack.size();
|
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
// Ranges must be Done.
|
2021-12-01 08:30:16 +08:00
|
|
|
assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
|
2014-02-06 06:13:59 +08:00
|
|
|
"Last chance recoloring should really be last chance");
|
|
|
|
// Set the max depth to LastChanceRecoloringMaxDepth.
|
|
|
|
// We may want to reconsider that if we end up with a too large search space
|
|
|
|
// for target with hundreds of registers.
|
|
|
|
// Indeed, in that case we may want to cut the search space earlier.
|
2014-04-12 05:39:44 +08:00
|
|
|
if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
|
2014-04-04 10:05:21 +08:00
|
|
|
CutOffInfo |= CO_Depth;
|
2014-02-06 06:13:59 +08:00
|
|
|
return ~0u;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set of Live intervals that will need to be recolored.
|
|
|
|
SmallLISet RecoloringCandidates;
|
2022-03-18 01:12:36 +08:00
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
|
|
|
|
// this recoloring "session".
|
2020-09-16 05:54:38 +08:00
|
|
|
assert(!FixedRegisters.count(VirtReg.reg()));
|
|
|
|
FixedRegisters.insert(VirtReg.reg());
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVector<Register, 4> CurrentNewVRegs;
|
2014-02-06 06:13:59 +08:00
|
|
|
|
2020-09-24 12:58:45 +08:00
|
|
|
for (MCRegister PhysReg : Order) {
|
|
|
|
assert(PhysReg.isValid());
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
|
|
|
|
<< printReg(PhysReg, TRI) << '\n');
|
2014-02-06 06:13:59 +08:00
|
|
|
RecoloringCandidates.clear();
|
2016-09-17 06:00:50 +08:00
|
|
|
CurrentNewVRegs.clear();
|
2014-02-06 06:13:59 +08:00
|
|
|
|
|
|
|
// It is only possible to recolor virtual register interference.
|
|
|
|
if (Matrix->checkInterference(VirtReg, PhysReg) >
|
|
|
|
LiveRegMatrix::IK_VirtReg) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Some interferences are not with virtual registers.\n");
|
2014-02-06 06:13:59 +08:00
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Early give up on this PhysReg if it is obvious we cannot recolor all
|
|
|
|
// the interferences.
|
|
|
|
if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
|
|
|
|
FixedRegisters)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
|
2014-02-06 06:13:59 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-03-01 07:26:41 +08:00
|
|
|
// RecoloringCandidates contains all the virtual registers that interfere
|
|
|
|
// with VirtReg on PhysReg (or one of its aliases). Enqueue them for
|
|
|
|
// recoloring and perform the actual recoloring.
|
2014-02-06 06:13:59 +08:00
|
|
|
PQueue RecoloringQueue;
|
2022-02-04 01:07:42 +08:00
|
|
|
for (const LiveInterval *RC : RecoloringCandidates) {
|
2021-02-18 15:58:46 +08:00
|
|
|
Register ItVirtReg = RC->reg();
|
|
|
|
enqueue(RecoloringQueue, RC);
|
2014-02-06 06:13:59 +08:00
|
|
|
assert(VRM->hasPhys(ItVirtReg) &&
|
2018-06-20 13:29:26 +08:00
|
|
|
"Interferences are supposed to be with allocated variables");
|
2014-02-06 06:13:59 +08:00
|
|
|
|
|
|
|
// Record the current allocation.
|
2022-03-18 01:12:36 +08:00
|
|
|
RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
|
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
// unset the related struct.
|
2021-02-18 15:58:46 +08:00
|
|
|
Matrix->unassign(*RC);
|
2014-02-06 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do as if VirtReg was assigned to PhysReg so that the underlying
|
|
|
|
// recoloring has the right information about the interferes and
|
|
|
|
// available colors.
|
|
|
|
Matrix->assign(VirtReg, PhysReg);
|
|
|
|
|
|
|
|
// Save the current recoloring state.
|
|
|
|
// If we cannot recolor all the interferences, we will have to start again
|
|
|
|
// at this point for the next physical register.
|
|
|
|
SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
|
2016-09-17 06:00:50 +08:00
|
|
|
if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
|
2022-03-18 01:12:36 +08:00
|
|
|
FixedRegisters, RecolorStack, Depth)) {
|
2016-09-17 06:00:50 +08:00
|
|
|
// Push the queued vregs into the main queue.
|
2020-06-30 23:57:24 +08:00
|
|
|
for (Register NewVReg : CurrentNewVRegs)
|
2016-09-17 06:00:50 +08:00
|
|
|
NewVRegs.push_back(NewVReg);
|
2014-02-06 06:13:59 +08:00
|
|
|
// Do not mess up with the global assignment process.
|
|
|
|
// I.e., VirtReg must be unassigned.
|
|
|
|
Matrix->unassign(VirtReg);
|
|
|
|
return PhysReg;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
|
|
|
|
<< printReg(PhysReg, TRI) << '\n');
|
2014-02-06 06:13:59 +08:00
|
|
|
|
|
|
|
// The recoloring attempt failed, undo the changes.
|
|
|
|
FixedRegisters = SaveFixedRegisters;
|
|
|
|
Matrix->unassign(VirtReg);
|
|
|
|
|
2016-11-09 02:19:36 +08:00
|
|
|
// For a newly created vreg which is also in RecoloringCandidates,
|
|
|
|
// don't add it to NewVRegs because its physical register will be restored
|
|
|
|
// below. Other vregs in CurrentNewVRegs are created by calling
|
|
|
|
// selectOrSplit and should be added into NewVRegs.
|
2021-02-18 15:58:46 +08:00
|
|
|
for (Register &R : CurrentNewVRegs) {
|
|
|
|
if (RecoloringCandidates.count(&LIS->getInterval(R)))
|
2016-09-17 06:00:50 +08:00
|
|
|
continue;
|
2021-02-18 15:58:46 +08:00
|
|
|
NewVRegs.push_back(R);
|
2016-09-17 06:00:50 +08:00
|
|
|
}
|
|
|
|
|
2022-03-18 01:12:36 +08:00
|
|
|
// Roll back our unsuccessful recoloring. Also roll back any successful
|
|
|
|
// recolorings in any recursive recoloring attempts, since it's possible
|
|
|
|
// they would have introduced conflicts with assignments we will be
|
|
|
|
// restoring further up the stack. Perform all unassignments prior to
|
|
|
|
// reassigning, since sub-recolorings may have conflicted with the registers
|
|
|
|
// we are going to restore to their original assignments.
|
|
|
|
for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
|
|
|
|
const LiveInterval *LI;
|
|
|
|
MCRegister PhysReg;
|
|
|
|
std::tie(LI, PhysReg) = RecolorStack[I];
|
|
|
|
|
|
|
|
if (VRM->hasPhys(LI->reg()))
|
|
|
|
Matrix->unassign(*LI);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
|
|
|
|
const LiveInterval *LI;
|
|
|
|
MCRegister PhysReg;
|
|
|
|
std::tie(LI, PhysReg) = RecolorStack[I];
|
|
|
|
Matrix->assign(*LI, PhysReg);
|
2014-02-06 06:13:59 +08:00
|
|
|
}
|
2022-03-18 01:12:36 +08:00
|
|
|
|
|
|
|
// Pop the stack of recoloring attempts.
|
|
|
|
RecolorStack.resize(EntryStackSize);
|
2014-02-06 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Last chance recoloring did not worked either, give up.
|
|
|
|
return ~0u;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// tryRecoloringCandidates - Try to assign a new color to every register
|
|
|
|
/// in \RecoloringQueue.
|
|
|
|
/// \p NewRegs will contain any new virtual register created during the
|
|
|
|
/// recoloring process.
|
|
|
|
/// \p FixedRegisters[in/out] contains all the registers that have been
|
|
|
|
/// recolored.
|
|
|
|
/// \return true if all virtual registers in RecoloringQueue were successfully
|
|
|
|
/// recolored, false otherwise.
|
|
|
|
bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
|
2020-06-30 23:57:24 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
2014-02-06 06:13:59 +08:00
|
|
|
SmallVirtRegSet &FixedRegisters,
|
2022-03-18 01:12:36 +08:00
|
|
|
RecoloringStack &RecolorStack,
|
2014-02-06 06:13:59 +08:00
|
|
|
unsigned Depth) {
|
|
|
|
while (!RecoloringQueue.empty()) {
|
2022-02-04 01:07:42 +08:00
|
|
|
const LiveInterval *LI = dequeue(RecoloringQueue);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
|
2022-03-18 01:12:36 +08:00
|
|
|
MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
|
|
|
|
RecolorStack, Depth + 1);
|
2016-10-14 03:27:48 +08:00
|
|
|
// When splitting happens, the live-range may actually be empty.
|
|
|
|
// In that case, this is okay to continue the recoloring even
|
|
|
|
// if we did not find an alternative color for it. Indeed,
|
|
|
|
// there will not be anything to color for LI in the end.
|
|
|
|
if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
|
2014-02-06 06:13:59 +08:00
|
|
|
return false;
|
2016-10-14 03:27:48 +08:00
|
|
|
|
|
|
|
if (!PhysReg) {
|
|
|
|
assert(LI->empty() && "Only empty live-range do not require a register");
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
|
|
|
|
<< " succeeded. Empty LI.\n");
|
2016-10-14 03:27:48 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
|
|
|
|
<< " succeeded with: " << printReg(PhysReg, TRI) << '\n');
|
2016-10-14 03:27:48 +08:00
|
|
|
|
2014-02-06 06:13:59 +08:00
|
|
|
Matrix->assign(*LI, PhysReg);
|
2020-09-16 05:54:38 +08:00
|
|
|
FixedRegisters.insert(LI->reg());
|
2014-02-06 06:13:59 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2010-12-23 06:01:30 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Main Entry Point
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
|
2020-10-10 01:04:29 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs) {
|
2014-04-04 10:05:21 +08:00
|
|
|
CutOffInfo = CO_None;
|
2017-12-16 06:22:58 +08:00
|
|
|
LLVMContext &Ctx = MF->getFunction().getContext();
|
2014-02-06 06:13:59 +08:00
|
|
|
SmallVirtRegSet FixedRegisters;
|
2022-03-18 01:12:36 +08:00
|
|
|
RecoloringStack RecolorStack;
|
|
|
|
MCRegister Reg =
|
|
|
|
selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
|
2014-04-04 10:05:21 +08:00
|
|
|
if (Reg == ~0U && (CutOffInfo != CO_None)) {
|
|
|
|
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
|
|
|
|
if (CutOffEncountered == CO_Depth)
|
2014-04-12 05:39:44 +08:00
|
|
|
Ctx.emitError("register allocation failed: maximum depth for recoloring "
|
|
|
|
"reached. Use -fexhaustive-register-search to skip "
|
|
|
|
"cutoffs");
|
2014-04-04 10:05:21 +08:00
|
|
|
else if (CutOffEncountered == CO_Interf)
|
|
|
|
Ctx.emitError("register allocation failed: maximum interference for "
|
2014-04-12 05:39:44 +08:00
|
|
|
"recoloring reached. Use -fexhaustive-register-search "
|
|
|
|
"to skip cutoffs");
|
2014-04-04 10:05:21 +08:00
|
|
|
else if (CutOffEncountered == (CO_Depth | CO_Interf))
|
|
|
|
Ctx.emitError("register allocation failed: maximum interference and "
|
2014-04-12 05:39:44 +08:00
|
|
|
"depth for recoloring reached. Use "
|
|
|
|
"-fexhaustive-register-search to skip cutoffs");
|
2014-04-04 10:05:21 +08:00
|
|
|
}
|
|
|
|
return Reg;
|
2014-02-06 06:13:59 +08:00
|
|
|
}
|
|
|
|
|
2014-03-28 05:21:57 +08:00
|
|
|
/// Using a CSR for the first time has a cost because it causes push|pop
|
|
|
|
/// to be added to prologue|epilogue. Splitting a cold section of the live
|
|
|
|
/// range can have lower cost than using the CSR for the first time;
|
|
|
|
/// Spilling a live range in the cold path can have lower cost than using
|
|
|
|
/// the CSR for the first time. Returns the physical register if we decide
|
|
|
|
/// to use the CSR; otherwise return 0.
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::tryAssignCSRFirstTime(
|
|
|
|
const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
|
|
|
|
uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
|
2014-03-28 05:21:57 +08:00
|
|
|
// We choose spill over using the CSR for the first time if the spill cost
|
|
|
|
// is lower than CSRCost.
|
|
|
|
SA->analyze(&VirtReg);
|
|
|
|
if (calcSpillCost() >= CSRCost)
|
|
|
|
return PhysReg;
|
|
|
|
|
|
|
|
// We are going to spill, set CostPerUseLimit to 1 to make sure that
|
|
|
|
// we will not use a callee-saved register in tryEvict.
|
|
|
|
CostPerUseLimit = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2021-12-01 08:30:16 +08:00
|
|
|
if (ExtraInfo->getStage(VirtReg) < RS_Split) {
|
2014-03-28 05:21:57 +08:00
|
|
|
// We choose pre-splitting over using the CSR for the first time if
|
|
|
|
// the cost of splitting is lower than CSRCost.
|
|
|
|
SA->analyze(&VirtReg);
|
|
|
|
unsigned NumCands = 0;
|
2014-04-09 03:18:56 +08:00
|
|
|
BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
|
|
|
|
unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
|
|
|
|
NumCands, true /*IgnoreCSR*/);
|
2014-03-28 05:21:57 +08:00
|
|
|
if (BestCand == NoCand)
|
|
|
|
// Use the CSR if we can't find a region split below CSRCost.
|
|
|
|
return PhysReg;
|
|
|
|
|
|
|
|
// Perform the actual pre-splitting.
|
|
|
|
doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return PhysReg;
|
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// Do not keep invalid information around.
|
|
|
|
SetOfBrokenHints.remove(&LI);
|
|
|
|
}
|
|
|
|
|
2014-04-09 03:18:56 +08:00
|
|
|
void RAGreedy::initializeCSRCost() {
|
|
|
|
// We use the larger one out of the command-line option and the value report
|
|
|
|
// by TRI.
|
|
|
|
CSRCost = BlockFrequency(
|
|
|
|
std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
|
|
|
|
if (!CSRCost.getFrequency())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Raw cost is relative to Entry == 2^14; scale it appropriately.
|
|
|
|
uint64_t ActualEntry = MBFI->getEntryFreq();
|
|
|
|
if (!ActualEntry) {
|
|
|
|
CSRCost = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
uint64_t FixedEntry = 1 << 14;
|
|
|
|
if (ActualEntry < FixedEntry)
|
|
|
|
CSRCost *= BranchProbability(ActualEntry, FixedEntry);
|
|
|
|
else if (ActualEntry <= UINT32_MAX)
|
|
|
|
// Invert the fraction and divide.
|
|
|
|
CSRCost /= BranchProbability(FixedEntry, ActualEntry);
|
|
|
|
else
|
|
|
|
// Can't use BranchProbability in general, since it takes 32-bit numbers.
|
|
|
|
CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Collect the hint info for \p Reg.
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
/// The results are stored into \p Out.
|
|
|
|
/// \p Out is not cleared before being populated.
|
2020-11-03 12:07:58 +08:00
|
|
|
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
|
|
|
|
if (!Instr.isFullCopy())
|
|
|
|
continue;
|
|
|
|
// Look for the other end of the copy.
|
2019-06-24 23:50:29 +08:00
|
|
|
Register OtherReg = Instr.getOperand(0).getReg();
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
if (OtherReg == Reg) {
|
|
|
|
OtherReg = Instr.getOperand(1).getReg();
|
|
|
|
if (OtherReg == Reg)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Get the current assignment.
|
2020-11-03 12:07:58 +08:00
|
|
|
MCRegister OtherPhysReg =
|
|
|
|
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// Push the collected information.
|
|
|
|
Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
|
|
|
|
OtherPhysReg));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Using the given \p List, compute the cost of the broken hints if
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
/// \p PhysReg was used.
|
|
|
|
/// \return The cost of \p List for \p PhysReg.
|
|
|
|
BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister PhysReg) {
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
BlockFrequency Cost = 0;
|
|
|
|
for (const HintInfo &Info : List) {
|
|
|
|
if (Info.PhysReg != PhysReg)
|
|
|
|
Cost += Info.Freq;
|
|
|
|
}
|
|
|
|
return Cost;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Using the register assigned to \p VirtReg, try to recolor
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
/// all the live ranges that are copy-related with \p VirtReg.
|
|
|
|
/// The recoloring is then propagated to all the live-ranges that have
|
|
|
|
/// been recolored and so on, until no more copies can be coalesced or
|
|
|
|
/// it is not profitable.
|
|
|
|
/// For a given live range, profitability is determined by the sum of the
|
|
|
|
/// frequencies of the non-identity copies it would introduce with the old
|
|
|
|
/// and new register.
|
2022-02-04 01:07:42 +08:00
|
|
|
void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// We have a broken hint, check if it is possible to fix it by
|
|
|
|
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
|
|
|
|
// some register and PhysReg may be available for the other live-ranges.
|
2020-11-03 12:07:58 +08:00
|
|
|
SmallSet<Register, 4> Visited;
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
SmallVector<unsigned, 2> RecoloringCandidates;
|
|
|
|
HintsInfo Info;
|
2020-11-03 12:07:58 +08:00
|
|
|
Register Reg = VirtReg.reg();
|
2020-10-10 01:04:29 +08:00
|
|
|
MCRegister PhysReg = VRM->getPhys(Reg);
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// Start the recoloring algorithm from the input live-interval, then
|
|
|
|
// it will propagate to the ones that are copy-related with it.
|
|
|
|
Visited.insert(Reg);
|
|
|
|
RecoloringCandidates.push_back(Reg);
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
|
|
|
|
<< '(' << printReg(PhysReg, TRI) << ")\n");
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
|
|
|
|
do {
|
|
|
|
Reg = RecoloringCandidates.pop_back_val();
|
|
|
|
|
2017-07-10 20:44:25 +08:00
|
|
|
// We cannot recolor physical register.
|
2019-08-02 07:27:28 +08:00
|
|
|
if (Register::isPhysicalRegister(Reg))
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
continue;
|
|
|
|
|
RegAlloc: Allow targets to split register allocation
AMDGPU normally spills SGPRs to VGPRs. Previously, since all register
classes are handled at the same time, this was problematic. We don't
know ahead of time how many registers will be needed to be reserved to
handle the spilling. If no VGPRs were left for spilling, we would have
to try to spill to memory. If the spilled SGPRs were required for exec
mask manipulation, it is highly problematic because the lanes active
at the point of spill are not necessarily the same as at the restore
point.
Avoid this problem by fully allocating SGPRs in a separate regalloc
run from VGPRs. This way we know the exact number of VGPRs needed, and
can reserve them for a second run. This fixes the most serious
issues, but it is still possible using inline asm to make all VGPRs
unavailable. Start erroring in the case where we ever would require
memory for an SGPR spill.
This is implemented by giving each regalloc pass a callback which
reports if a register class should be handled or not. A few passes
need some small changes to deal with leftover virtual registers.
In the AMDGPU implementation, a new pass is introduced to take the
place of PrologEpilogInserter for SGPR spills emitted during the first
run.
One disadvantage of this is currently StackSlotColoring is no longer
used for SGPR spills. It would need to be run again, which will
require more work.
Error if the standard -regalloc option is used. Introduce new separate
-sgpr-regalloc and -vgpr-regalloc flags, so the two runs can be
controlled individually. PBQB is not currently supported, so this also
prevents using the unhandled allocator.
2018-09-27 07:36:28 +08:00
|
|
|
// This may be a skipped class
|
|
|
|
if (!VRM->hasPhys(Reg)) {
|
|
|
|
assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
|
|
|
|
"We have an unallocated variable which should have been handled");
|
|
|
|
continue;
|
|
|
|
}
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
|
|
|
|
// Get the live interval mapped with this virtual register to be able
|
|
|
|
// to check for the interference with the new color.
|
|
|
|
LiveInterval &LI = LIS->getInterval(Reg);
|
2020-10-23 01:30:30 +08:00
|
|
|
MCRegister CurrPhys = VRM->getPhys(Reg);
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// Check that the new color matches the register class constraints and
|
|
|
|
// that it is free for this live range.
|
|
|
|
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
|
|
|
|
Matrix->checkInterference(LI, PhysReg)))
|
|
|
|
continue;
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
|
|
|
|
<< ") is recolorable.\n");
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
|
|
|
|
// Gather the hint info.
|
|
|
|
Info.clear();
|
|
|
|
collectHintInfo(Reg, Info);
|
|
|
|
// Check if recoloring the live-range will increase the cost of the
|
|
|
|
// non-identity copies.
|
|
|
|
if (CurrPhys != PhysReg) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Checking profitability:\n");
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
|
|
|
|
BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
|
|
|
|
<< "\nNew Cost: " << NewCopiesCost.getFrequency()
|
|
|
|
<< '\n');
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
if (OldCopiesCost < NewCopiesCost) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// At this point, the cost is either cheaper or equal. If it is
|
|
|
|
// equal, we consider this is profitable because it may expose
|
|
|
|
// more recoloring opportunities.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "=> Profitable.\n");
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// Recolor the live-range.
|
|
|
|
Matrix->unassign(LI);
|
|
|
|
Matrix->assign(LI, PhysReg);
|
|
|
|
}
|
|
|
|
// Push all copy-related live-ranges to keep reconciling the broken
|
|
|
|
// hints.
|
|
|
|
for (const HintInfo &HI : Info) {
|
|
|
|
if (Visited.insert(HI.Reg).second)
|
|
|
|
RecoloringCandidates.push_back(HI.Reg);
|
|
|
|
}
|
|
|
|
} while (!RecoloringCandidates.empty());
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Try to recolor broken hints.
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
/// Broken hints may be repaired by recoloring when an evicted variable
|
|
|
|
/// freed up a register for a larger live-range.
|
|
|
|
/// Consider the following example:
|
|
|
|
/// BB1:
|
|
|
|
/// a =
|
|
|
|
/// b =
|
|
|
|
/// BB2:
|
|
|
|
/// ...
|
|
|
|
/// = b
|
|
|
|
/// = a
|
|
|
|
/// Let us assume b gets split:
|
|
|
|
/// BB1:
|
|
|
|
/// a =
|
|
|
|
/// b =
|
|
|
|
/// BB2:
|
|
|
|
/// c = b
|
|
|
|
/// ...
|
|
|
|
/// d = c
|
|
|
|
/// = d
|
|
|
|
/// = a
|
|
|
|
/// Because of how the allocation work, b, c, and d may be assigned different
|
|
|
|
/// colors. Now, if a gets evicted later:
|
|
|
|
/// BB1:
|
|
|
|
/// a =
|
|
|
|
/// st a, SpillSlot
|
|
|
|
/// b =
|
|
|
|
/// BB2:
|
|
|
|
/// c = b
|
|
|
|
/// ...
|
|
|
|
/// d = c
|
|
|
|
/// = d
|
|
|
|
/// e = ld SpillSlot
|
|
|
|
/// = e
|
|
|
|
/// This is likely that we can assign the same register for b, c, and d,
|
|
|
|
/// getting rid of 2 copies.
|
|
|
|
void RAGreedy::tryHintsRecoloring() {
|
2022-02-04 01:07:42 +08:00
|
|
|
for (const LiveInterval *LI : SetOfBrokenHints) {
|
2020-09-16 05:54:38 +08:00
|
|
|
assert(Register::isVirtualRegister(LI->reg()) &&
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
"Recoloring is possible only for virtual registers");
|
|
|
|
// Some dead defs may be around (e.g., because of debug uses).
|
|
|
|
// Ignore those.
|
2020-09-16 05:54:38 +08:00
|
|
|
if (!VRM->hasPhys(LI->reg()))
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
continue;
|
|
|
|
tryHintRecoloring(*LI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-04 01:07:42 +08:00
|
|
|
MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
|
2020-10-10 01:04:29 +08:00
|
|
|
SmallVectorImpl<Register> &NewVRegs,
|
|
|
|
SmallVirtRegSet &FixedRegisters,
|
2022-03-18 01:12:36 +08:00
|
|
|
RecoloringStack &RecolorStack,
|
2020-10-10 01:04:29 +08:00
|
|
|
unsigned Depth) {
|
2020-12-23 13:21:13 +08:00
|
|
|
uint8_t CostPerUseLimit = uint8_t(~0u);
|
2010-12-23 06:01:30 +08:00
|
|
|
// First try assigning a free register.
|
2020-09-29 07:41:28 +08:00
|
|
|
auto Order =
|
|
|
|
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
|
2020-11-03 12:07:58 +08:00
|
|
|
if (MCRegister PhysReg =
|
|
|
|
tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
|
2021-03-23 13:13:19 +08:00
|
|
|
// If VirtReg got an assignment, the eviction info is no longer relevant.
|
2020-09-16 05:54:38 +08:00
|
|
|
LastEvicted.clearEvicteeInfo(VirtReg.reg());
|
2014-03-28 05:21:57 +08:00
|
|
|
// When NewVRegs is not empty, we may have made decisions such as evicting
|
|
|
|
// a virtual register, go with the earlier decisions and use the physical
|
|
|
|
// register.
|
2021-12-14 14:49:57 +08:00
|
|
|
if (CSRCost.getFrequency() &&
|
|
|
|
EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
|
2020-10-10 01:04:29 +08:00
|
|
|
MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
|
|
|
|
CostPerUseLimit, NewVRegs);
|
2014-03-28 05:21:57 +08:00
|
|
|
if (CSRReg || !NewVRegs.empty())
|
|
|
|
// Return now if we decide to use a CSR or create new vregs due to
|
|
|
|
// pre-splitting.
|
|
|
|
return CSRReg;
|
2014-03-25 08:16:25 +08:00
|
|
|
} else
|
|
|
|
return PhysReg;
|
|
|
|
}
|
2010-12-10 02:15:21 +08:00
|
|
|
|
2021-12-01 08:30:16 +08:00
|
|
|
LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
|
2021-12-01 08:30:16 +08:00
|
|
|
<< ExtraInfo->getCascade(VirtReg.reg()) << '\n');
|
2011-05-26 07:58:36 +08:00
|
|
|
|
2011-06-02 02:45:02 +08:00
|
|
|
// Try to evict a less worthy live range, but only for ranges from the primary
|
2011-07-25 23:25:41 +08:00
|
|
|
// queue. The RS_Split ranges already failed to do this, and they should not
|
2011-06-02 02:45:02 +08:00
|
|
|
// get a second chance until they have been split.
|
2011-07-25 23:25:41 +08:00
|
|
|
if (Stage != RS_Split)
|
2020-06-30 23:57:24 +08:00
|
|
|
if (Register PhysReg =
|
2019-02-14 06:56:43 +08:00
|
|
|
tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
|
|
|
|
FixedRegisters)) {
|
2020-09-16 05:54:38 +08:00
|
|
|
Register Hint = MRI->getSimpleHint(VirtReg.reg());
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
// If VirtReg has a hint and that hint is broken record this
|
|
|
|
// virtual register as a recoloring candidate for broken hint.
|
|
|
|
// Indeed, since we evicted a variable in its neighborhood it is
|
|
|
|
// likely we can at least partially recolor some of the
|
|
|
|
// copy-related live-ranges.
|
|
|
|
if (Hint && Hint != PhysReg)
|
|
|
|
SetOfBrokenHints.insert(&VirtReg);
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
// If VirtReg eviction someone, the eviction info for it as an evictee is
|
2021-03-23 13:13:19 +08:00
|
|
|
// no longer relevant.
|
2020-09-16 05:54:38 +08:00
|
|
|
LastEvicted.clearEvicteeInfo(VirtReg.reg());
|
2011-06-02 02:45:02 +08:00
|
|
|
return PhysReg;
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
}
|
2010-12-10 02:15:21 +08:00
|
|
|
|
2016-09-17 06:00:42 +08:00
|
|
|
assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
|
2011-01-20 06:11:48 +08:00
|
|
|
|
2011-02-25 07:21:36 +08:00
|
|
|
// The first time we see a live range, don't try to split or spill.
|
|
|
|
// Wait until the second time, when all smaller ranges have been allocated.
|
|
|
|
// This gives a better picture of the interference to split around.
|
2011-07-25 23:25:41 +08:00
|
|
|
if (Stage < RS_Split) {
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(VirtReg, RS_Split);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "wait for second round\n");
|
2020-09-16 05:54:38 +08:00
|
|
|
NewVRegs.push_back(VirtReg.reg());
|
2011-02-25 07:21:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-10-11 09:04:36 +08:00
|
|
|
if (Stage < RS_Spill) {
|
|
|
|
// Try splitting VirtReg or interferences.
|
|
|
|
unsigned NewVRegSizeBefore = NewVRegs.size();
|
2020-06-30 23:57:24 +08:00
|
|
|
Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
|
2020-03-12 23:15:04 +08:00
|
|
|
// If VirtReg got split, the eviction info is no longer relevant.
|
2020-09-16 05:54:38 +08:00
|
|
|
LastEvicted.clearEvicteeInfo(VirtReg.reg());
|
2016-10-11 09:04:36 +08:00
|
|
|
return PhysReg;
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
}
|
2016-10-11 09:04:36 +08:00
|
|
|
}
|
|
|
|
|
2011-05-07 05:58:30 +08:00
|
|
|
// If we couldn't allocate a register from spilling, there is probably some
|
2017-06-30 02:03:28 +08:00
|
|
|
// invalid inline assembly. The base class will report it.
|
2022-03-18 01:12:36 +08:00
|
|
|
if (Stage >= RS_Done || !VirtReg.isSpillable()) {
|
2014-02-06 06:13:59 +08:00
|
|
|
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
|
2022-03-18 01:12:36 +08:00
|
|
|
RecolorStack, Depth);
|
|
|
|
}
|
2011-03-02 05:10:07 +08:00
|
|
|
|
2010-12-23 06:01:30 +08:00
|
|
|
// Finally spill VirtReg itself.
|
2020-09-18 05:47:12 +08:00
|
|
|
if ((EnableDeferredSpilling ||
|
|
|
|
TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->getStage(VirtReg) < RS_Memory) {
|
2015-07-18 07:04:06 +08:00
|
|
|
// TODO: This is experimental and in particular, we do not model
|
|
|
|
// the live range splitting done by spilling correctly.
|
|
|
|
// We would need a deep integration with the spiller to do the
|
|
|
|
// right thing here. Anyway, that is still good for early testing.
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(VirtReg, RS_Memory);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
|
2020-09-16 05:54:38 +08:00
|
|
|
NewVRegs.push_back(VirtReg.reg());
|
2015-07-18 07:04:06 +08:00
|
|
|
} else {
|
2016-11-19 03:43:18 +08:00
|
|
|
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
|
|
|
|
TimerGroupDescription, TimePassesIsEnabled);
|
2016-04-13 11:08:27 +08:00
|
|
|
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
|
2015-07-18 07:04:06 +08:00
|
|
|
spiller().spill(LRE);
|
2021-12-01 08:30:16 +08:00
|
|
|
ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
|
2010-12-08 11:26:16 +08:00
|
|
|
|
[LDV][RAGreedy] Inform LiveDebugVariables about new VRegs added by InlineSpiller
Summary:
Make sure RAGreedy informs LiveDebugVariables about new VRegs
that is introduced at spill by InlineSpiller.
Consider this example
LDV: !"var" [48r;128r):0 Loc0=%2
48B %2 = ...
...
128B %7 = ADD %2, ...
If %2 is spilled the InlineSpiller will insert spill/reload
instructions and introduces some new vregs. So we get
48B %4 = ...
56B spill %4
...
120B reload %5
128B %3 = ADD %5, ...
In the past we did not inform LDV about this, and when reintroducing
DBG_VALUE instruction LDV still got information that "var" had the
location of the spilled register %2 for the interval [48r;128r).
The result was bad, since we mapped "var" to the spill slot even
before the spill happened:
%4 = ...
DBG_VALUE %spill.0, !"var"
spill %4 to %spill.0
...
reload %5
%3 = ADD %5, ...
This patch will inform LDV about the interval split introduced
due to spilling. So the location map in LDV will become
!"var" [48r;56r):1 [56r;120r):0 [120r;128r):2 Loc0=%2 Loc1=%4 Loc2=%5
And when inserting DBG_VALUE instructions we get
%4 = ...
DBG_VALUE %4, !"var"
spill %4 to %spill.0
DBG_VALUE %spill.0, !"var"
...
reload %5
DBG_VALUE %5, !"var"
%3 = ADD %5, ...
Fixes: https://bugs.llvm.org/show_bug.cgi?id=38899
Reviewers: jmorse, vsk, aprantl
Reviewed By: jmorse
Subscribers: dstenb, wuzish, MatzeB, qcolombet, nemanjai, hiraditya, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69584
2019-10-26 01:03:18 +08:00
|
|
|
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
|
|
|
|
// the new regs are kept in LDV (still mapping to the old register), until
|
|
|
|
// we rewrite spilled locations in LDV at a later stage.
|
2020-09-16 05:54:38 +08:00
|
|
|
DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
|
[LDV][RAGreedy] Inform LiveDebugVariables about new VRegs added by InlineSpiller
Summary:
Make sure RAGreedy informs LiveDebugVariables about new VRegs
that is introduced at spill by InlineSpiller.
Consider this example
LDV: !"var" [48r;128r):0 Loc0=%2
48B %2 = ...
...
128B %7 = ADD %2, ...
If %2 is spilled the InlineSpiller will insert spill/reload
instructions and introduces some new vregs. So we get
48B %4 = ...
56B spill %4
...
120B reload %5
128B %3 = ADD %5, ...
In the past we did not inform LDV about this, and when reintroducing
DBG_VALUE instruction LDV still got information that "var" had the
location of the spilled register %2 for the interval [48r;128r).
The result was bad, since we mapped "var" to the spill slot even
before the spill happened:
%4 = ...
DBG_VALUE %spill.0, !"var"
spill %4 to %spill.0
...
reload %5
%3 = ADD %5, ...
This patch will inform LDV about the interval split introduced
due to spilling. So the location map in LDV will become
!"var" [48r;56r):1 [56r;120r):0 [120r;128r):2 Loc0=%2 Loc1=%4 Loc2=%5
And when inserting DBG_VALUE instructions we get
%4 = ...
DBG_VALUE %4, !"var"
spill %4 to %spill.0
DBG_VALUE %spill.0, !"var"
...
reload %5
DBG_VALUE %5, !"var"
%3 = ADD %5, ...
Fixes: https://bugs.llvm.org/show_bug.cgi?id=38899
Reviewers: jmorse, vsk, aprantl
Reviewed By: jmorse
Subscribers: dstenb, wuzish, MatzeB, qcolombet, nemanjai, hiraditya, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69584
2019-10-26 01:03:18 +08:00
|
|
|
|
2015-07-18 07:04:06 +08:00
|
|
|
if (VerifyEnabled)
|
|
|
|
MF->verify(this, "After spilling");
|
|
|
|
}
|
2011-03-17 06:56:08 +08:00
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
// The live virtual register requesting allocation was spilled, so tell
|
|
|
|
// the caller not to allocate anything during this round.
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:27:37 +08:00
|
|
|
void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
|
|
|
|
using namespace ore;
|
2021-04-20 13:59:44 +08:00
|
|
|
if (Spills) {
|
2021-04-08 15:27:37 +08:00
|
|
|
R << NV("NumSpills", Spills) << " spills ";
|
2021-04-20 13:59:44 +08:00
|
|
|
R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
|
|
|
|
}
|
|
|
|
if (FoldedSpills) {
|
2021-04-08 15:27:37 +08:00
|
|
|
R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
|
2021-04-20 13:59:44 +08:00
|
|
|
R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
|
|
|
|
<< " total folded spills cost ";
|
|
|
|
}
|
|
|
|
if (Reloads) {
|
2021-04-08 15:27:37 +08:00
|
|
|
R << NV("NumReloads", Reloads) << " reloads ";
|
2021-04-20 13:59:44 +08:00
|
|
|
R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
|
|
|
|
}
|
|
|
|
if (FoldedReloads) {
|
2021-04-08 15:27:37 +08:00
|
|
|
R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
|
2021-04-20 13:59:44 +08:00
|
|
|
R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
|
|
|
|
<< " total folded reloads cost ";
|
|
|
|
}
|
2021-04-14 14:30:58 +08:00
|
|
|
if (ZeroCostFoldedReloads)
|
|
|
|
R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
|
|
|
|
<< " zero cost folded reloads ";
|
2021-04-20 13:59:44 +08:00
|
|
|
if (Copies) {
|
2021-04-09 17:31:05 +08:00
|
|
|
R << NV("NumVRCopies", Copies) << " virtual registers copies ";
|
2021-04-20 13:59:44 +08:00
|
|
|
R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
|
|
|
|
}
|
2021-04-08 15:27:37 +08:00
|
|
|
}
|
2017-01-26 07:20:33 +08:00
|
|
|
|
2021-04-09 17:31:05 +08:00
|
|
|
RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
|
2021-04-08 15:38:38 +08:00
|
|
|
RAGreedyStats Stats;
|
|
|
|
const MachineFrameInfo &MFI = MF->getFrameInfo();
|
|
|
|
int FI;
|
|
|
|
|
2021-04-09 12:29:35 +08:00
|
|
|
auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
|
|
|
|
return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
|
|
|
|
A->getPseudoValue())->getFrameIndex());
|
|
|
|
};
|
2021-04-14 14:30:58 +08:00
|
|
|
auto isPatchpointInstr = [](const MachineInstr &MI) {
|
|
|
|
return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
|
|
|
|
MI.getOpcode() == TargetOpcode::STACKMAP ||
|
|
|
|
MI.getOpcode() == TargetOpcode::STATEPOINT;
|
|
|
|
};
|
2021-04-08 15:38:38 +08:00
|
|
|
for (MachineInstr &MI : MBB) {
|
2021-04-09 17:31:05 +08:00
|
|
|
if (MI.isCopy()) {
|
|
|
|
MachineOperand &Dest = MI.getOperand(0);
|
|
|
|
MachineOperand &Src = MI.getOperand(1);
|
|
|
|
if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
|
|
|
|
Src.getReg().isVirtual())
|
|
|
|
++Stats.Copies;
|
|
|
|
continue;
|
|
|
|
}
|
2021-04-08 15:38:38 +08:00
|
|
|
|
2021-04-09 17:31:05 +08:00
|
|
|
SmallVector<const MachineMemOperand *, 2> Accesses;
|
2021-04-09 12:29:35 +08:00
|
|
|
if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
|
2021-04-08 15:38:38 +08:00
|
|
|
++Stats.Reloads;
|
2021-04-09 12:29:35 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
|
2021-04-08 15:38:38 +08:00
|
|
|
++Stats.Spills;
|
2021-04-09 12:29:35 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (TII->hasLoadFromStackSlot(MI, Accesses) &&
|
|
|
|
llvm::any_of(Accesses, isSpillSlotAccess)) {
|
2021-04-14 14:30:58 +08:00
|
|
|
if (!isPatchpointInstr(MI)) {
|
|
|
|
Stats.FoldedReloads += Accesses.size();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// For statepoint there may be folded and zero cost folded stack reloads.
|
|
|
|
std::pair<unsigned, unsigned> NonZeroCostRange =
|
|
|
|
TII->getPatchpointUnfoldableRange(MI);
|
|
|
|
SmallSet<unsigned, 16> FoldedReloads;
|
|
|
|
SmallSet<unsigned, 16> ZeroCostFoldedReloads;
|
|
|
|
for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
|
|
|
|
MachineOperand &MO = MI.getOperand(Idx);
|
|
|
|
if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
|
|
|
|
continue;
|
|
|
|
if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
|
|
|
|
FoldedReloads.insert(MO.getIndex());
|
|
|
|
else
|
|
|
|
ZeroCostFoldedReloads.insert(MO.getIndex());
|
|
|
|
}
|
|
|
|
// If stack slot is used in folded reload it is not zero cost then.
|
|
|
|
for (unsigned Slot : FoldedReloads)
|
|
|
|
ZeroCostFoldedReloads.erase(Slot);
|
|
|
|
Stats.FoldedReloads += FoldedReloads.size();
|
|
|
|
Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
|
2021-04-09 12:29:35 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
Accesses.clear();
|
|
|
|
if (TII->hasStoreToStackSlot(MI, Accesses) &&
|
|
|
|
llvm::any_of(Accesses, isSpillSlotAccess)) {
|
2021-04-14 14:30:58 +08:00
|
|
|
Stats.FoldedSpills += Accesses.size();
|
2021-04-09 12:29:35 +08:00
|
|
|
}
|
2021-04-08 15:38:38 +08:00
|
|
|
}
|
2021-04-20 13:59:44 +08:00
|
|
|
// Set cost of collected statistic by multiplication to relative frequency of
|
|
|
|
// this basic block.
|
|
|
|
float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
|
|
|
|
Stats.ReloadsCost = RelFreq * Stats.Reloads;
|
|
|
|
Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
|
|
|
|
Stats.SpillsCost = RelFreq * Stats.Spills;
|
|
|
|
Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
|
|
|
|
Stats.CopiesCost = RelFreq * Stats.Copies;
|
2021-04-08 15:38:38 +08:00
|
|
|
return Stats;
|
|
|
|
}
|
|
|
|
|
2021-04-09 17:31:05 +08:00
|
|
|
RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
|
2021-04-08 15:27:37 +08:00
|
|
|
RAGreedyStats Stats;
|
2017-01-26 07:20:33 +08:00
|
|
|
|
2021-04-08 15:27:37 +08:00
|
|
|
// Sum up the spill and reloads in subloops.
|
|
|
|
for (MachineLoop *SubLoop : *L)
|
2021-04-09 17:31:05 +08:00
|
|
|
Stats.add(reportStats(SubLoop));
|
2017-01-26 07:20:33 +08:00
|
|
|
|
|
|
|
for (MachineBasicBlock *MBB : L->getBlocks())
|
|
|
|
// Handle blocks that were not included in subloops.
|
|
|
|
if (Loops->getLoopFor(MBB) == L)
|
2021-04-09 17:31:05 +08:00
|
|
|
Stats.add(computeStats(*MBB));
|
2017-01-26 07:20:33 +08:00
|
|
|
|
2021-04-08 15:27:37 +08:00
|
|
|
if (!Stats.isEmpty()) {
|
2017-01-26 07:20:33 +08:00
|
|
|
using namespace ore;
|
2017-06-07 06:22:41 +08:00
|
|
|
|
2017-10-12 01:12:59 +08:00
|
|
|
ORE->emit([&]() {
|
2021-04-09 17:31:05 +08:00
|
|
|
MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
|
2017-10-12 01:12:59 +08:00
|
|
|
L->getStartLoc(), L->getHeader());
|
2021-04-08 15:27:37 +08:00
|
|
|
Stats.report(R);
|
2017-10-12 01:12:59 +08:00
|
|
|
R << "generated in loop";
|
|
|
|
return R;
|
|
|
|
});
|
2017-01-26 07:20:33 +08:00
|
|
|
}
|
2021-04-08 15:27:37 +08:00
|
|
|
return Stats;
|
2017-01-26 07:20:33 +08:00
|
|
|
}
|
|
|
|
|
2021-04-09 17:31:05 +08:00
|
|
|
void RAGreedy::reportStats() {
|
2021-04-06 22:32:02 +08:00
|
|
|
if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
|
|
|
|
return;
|
2021-04-08 15:55:07 +08:00
|
|
|
RAGreedyStats Stats;
|
2021-04-08 15:27:37 +08:00
|
|
|
for (MachineLoop *L : *Loops)
|
2021-04-09 17:31:05 +08:00
|
|
|
Stats.add(reportStats(L));
|
2021-04-08 15:55:07 +08:00
|
|
|
// Process non-loop blocks.
|
|
|
|
for (MachineBasicBlock &MBB : *MF)
|
|
|
|
if (!Loops->getLoopFor(&MBB))
|
2021-04-09 17:31:05 +08:00
|
|
|
Stats.add(computeStats(MBB));
|
2021-04-08 15:55:07 +08:00
|
|
|
if (!Stats.isEmpty()) {
|
|
|
|
using namespace ore;
|
|
|
|
|
|
|
|
ORE->emit([&]() {
|
2021-04-09 16:39:55 +08:00
|
|
|
DebugLoc Loc;
|
|
|
|
if (auto *SP = MF->getFunction().getSubprogram())
|
|
|
|
Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
|
2021-04-09 17:31:05 +08:00
|
|
|
MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
|
2021-04-08 15:55:07 +08:00
|
|
|
&MF->front());
|
|
|
|
Stats.report(R);
|
|
|
|
R << "generated in function";
|
|
|
|
return R;
|
|
|
|
});
|
|
|
|
}
|
2021-04-06 22:32:02 +08:00
|
|
|
}
|
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
|
|
|
|
<< "********** Function: " << mf.getName() << '\n');
|
2010-12-08 11:26:16 +08:00
|
|
|
|
|
|
|
MF = &mf;
|
2014-10-14 15:22:00 +08:00
|
|
|
TRI = MF->getSubtarget().getRegisterInfo();
|
|
|
|
TII = MF->getSubtarget().getInstrInfo();
|
2014-01-03 06:47:22 +08:00
|
|
|
RCI.runOnMachineFunction(mf);
|
2014-07-03 02:32:04 +08:00
|
|
|
|
2010-12-18 07:16:35 +08:00
|
|
|
if (VerifyEnabled)
|
2010-12-18 08:06:56 +08:00
|
|
|
MF->verify(this, "Before greedy register allocator");
|
2010-12-18 07:16:35 +08:00
|
|
|
|
2012-06-21 06:52:29 +08:00
|
|
|
RegAllocBase::init(getAnalysis<VirtRegMap>(),
|
|
|
|
getAnalysis<LiveIntervals>(),
|
|
|
|
getAnalysis<LiveRegMatrix>());
|
2011-01-19 05:13:27 +08:00
|
|
|
Indexes = &getAnalysis<SlotIndexes>();
|
2013-06-18 03:00:36 +08:00
|
|
|
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
2010-12-18 07:16:32 +08:00
|
|
|
DomTree = &getAnalysis<MachineDominatorTree>();
|
2017-01-26 07:20:33 +08:00
|
|
|
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
|
2010-12-16 07:46:13 +08:00
|
|
|
Loops = &getAnalysis<MachineLoopInfo>();
|
2011-01-19 05:13:27 +08:00
|
|
|
Bundles = &getAnalysis<EdgeBundles>();
|
|
|
|
SpillPlacer = &getAnalysis<SpillPlacement>();
|
2011-05-07 02:00:02 +08:00
|
|
|
DebugVars = &getAnalysis<LiveDebugVariables>();
|
2016-07-09 05:08:09 +08:00
|
|
|
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
2011-01-19 05:13:27 +08:00
|
|
|
|
2014-04-09 03:18:56 +08:00
|
|
|
initializeCSRCost();
|
|
|
|
|
2020-12-23 13:21:13 +08:00
|
|
|
RegCosts = TRI->getRegisterCosts(*MF);
|
|
|
|
|
2022-02-01 06:01:43 +08:00
|
|
|
ExtraInfo.emplace();
|
|
|
|
EvictAdvisor =
|
|
|
|
getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
|
|
|
|
|
2020-10-10 07:38:42 +08:00
|
|
|
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
|
2021-02-18 05:32:26 +08:00
|
|
|
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
|
2020-09-30 00:09:25 +08:00
|
|
|
|
|
|
|
VRAI->calculateSpillWeightsAndHints();
|
2013-11-11 01:46:31 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(LIS->dump());
|
2013-07-25 15:26:26 +08:00
|
|
|
|
2011-02-19 08:53:42 +08:00
|
|
|
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
|
2021-02-18 05:32:26 +08:00
|
|
|
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
|
2022-02-01 06:01:43 +08:00
|
|
|
|
2012-06-21 06:52:26 +08:00
|
|
|
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
|
2011-07-27 07:41:46 +08:00
|
|
|
GlobalCand.resize(32); // This will grow as needed.
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
SetOfBrokenHints.clear();
|
Add logic to greedy reg alloc to avoid bad eviction chains
This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810
This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload
Such sequences are created in 2 scenarios:
Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills
As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).
Differential Revision: https://reviews.llvm.org/D35816
Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
2017-10-23 01:59:38 +08:00
|
|
|
LastEvicted.clear();
|
2010-12-16 07:46:13 +08:00
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
allocatePhysRegs();
|
[RegAllocGreedy] Introduce a late pass to repair broken hints.
A broken hint is a copy where both ends are assigned different colors. When a
variable gets evicted in the neighborhood of such copies, it is likely we can
reconcile some of them.
** Context **
Copies are inserted during the register allocation via splitting. These split
points are required to relax the constraints on the allocation problem. When
such a point is inserted, both ends of the copy would not share the same color
with respect to the current allocation problem. When variables get evicted,
the allocation problem becomes different and some split point may not be
required anymore. However, the related variables may already have been colored.
This usually shows up in the assembly with pattern like this:
def A
...
save A to B
def A
use A
restore A from B
...
use B
Whereas we could simply have done:
def B
...
def A
use A
...
use B
** Proposed Solution **
A variable having a broken hint is marked for late recoloring if and only if
selecting a register for it evict another variable. Indeed, if no eviction
happens this is pointless to look for recoloring opportunities as it means the
situation was the same as the initial allocation problem where we had to break
the hint.
Finally, when everything has been allocated, we look for recoloring
opportunities for all the identified candidates.
The recoloring is performed very late to rely on accurate copy cost (all
involved variables are allocated).
The recoloring is simple unlike the last change recoloring. It propagates the
color of the broken hint to all its copy-related variables. If the color is
available for them, the recoloring uses it, otherwise it gives up on that hint
even if a more complex coloring would have worked.
The recoloring happens only if it is profitable. The profitability is evaluated
using the expected frequency of the copies of the currently recolored variable
with a) its current color and b) with the target color. If a) is greater or
equal than b), then it is profitable and the recoloring happen.
** Example **
Consider the following example:
BB1:
a =
b =
BB2:
...
= b
= a
Let us assume b gets split:
BB1:
a =
b =
BB2:
c = b
...
d = c
= d
= a
Because of how the allocation work, b, c, and d may be assigned different
colors. Now, if a gets evicted to make room for c, assuming b and d were
assigned to something different than a.
We end up with:
BB1:
a =
st a, SpillSlot
b =
BB2:
c = b
...
d = c
= d
e = ld SpillSlot
= e
This is likely that we can assign the same register for b, c, and d,
getting rid of 2 copies.
** Performances **
Both ARM64 and x86_64 show performance improvements of up to 3% for the
llvm-testsuite + externals with Os and O3. There are a few regressions too that
comes from the (in)accuracy of the block frequency estimate.
<rdar://problem/18312047>
llvm-svn: 225422
2015-01-08 09:16:39 +08:00
|
|
|
tryHintsRecoloring();
|
2021-02-19 01:08:56 +08:00
|
|
|
|
|
|
|
if (VerifyEnabled)
|
|
|
|
MF->verify(this, "Before post optimization");
|
2016-04-13 11:08:27 +08:00
|
|
|
postOptimization();
|
2021-04-09 17:31:05 +08:00
|
|
|
reportStats();
|
2016-04-13 11:08:27 +08:00
|
|
|
|
2010-12-08 11:26:16 +08:00
|
|
|
releaseMemory();
|
|
|
|
return true;
|
|
|
|
}
|