2017-09-22 07:20:16 +08:00
|
|
|
//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
|
2011-01-06 09:21:53 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2011-01-06 09:21:53 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This analysis computes the optimal spill code placement between basic blocks.
|
|
|
|
//
|
|
|
|
// The runOnMachineFunction() method only precomputes some profiling information
|
2011-04-07 03:13:57 +08:00
|
|
|
// about the CFG. The real work is done by prepare(), addConstraints(), and
|
|
|
|
// finish() which are called by the register allocator.
|
2011-01-06 09:21:53 +08:00
|
|
|
//
|
|
|
|
// Given a variable that is live across multiple basic blocks, and given
|
|
|
|
// constraints on the basic blocks where the variable is live, determine which
|
|
|
|
// edge bundles should have the variable in a register and which edge bundles
|
|
|
|
// should have the variable in a stack slot.
|
|
|
|
//
|
|
|
|
// The returned bit vector can be used to place optimal spill code at basic
|
|
|
|
// block entries and exits. Spill code placement inside a basic block is not
|
|
|
|
// considered.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
|
|
|
|
#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
|
2011-01-06 09:21:53 +08:00
|
|
|
|
2011-04-07 03:13:57 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2011-03-04 08:58:40 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
Reapply r263460: [SpillPlacement] Fix a quadratic behavior in spill placement.
Using Chandler's words from r265331:
This commit was greatly exacerbating PR17409 and effectively regressed
build time for lot of (very large) code when compiled with ASan or MSan.
PR17409 is fixed by r269249, so this is fine to reapply r263460.
Original commit message:
The bad behavior happens when we have a function with a long linear
chain of basic blocks, and have a live range spanning most of this
chain, but with very few uses.
Let say we have only 2 uses.
The Hopfield network is only seeded with two active blocks where the
uses are, and each iteration of the outer loop in
`RAGreedy::growRegion()` only adds two new nodes to the network due to
the completely linear shape of the CFG. Meanwhile,
`SpillPlacer->iterate()` visits the whole set of discovered nodes, which
adds up to a quadratic algorithm.
This is an historical accident effect from r129188.
When the Hopfield network is expanding, most of the action is happening
on the frontier where new nodes are being added. The internal nodes in
the network are not likely to be flip-flopping much, or they will at
least settle down very quickly. This means that while
`SpillPlacer->iterate()` is recomputing all the nodes in the network, it
is probably only the two frontier nodes that are changing their output.
Instead of recomputing the whole network on each iteration, we can
maintain a SparseSet of nodes that need to be updated:
- `SpillPlacement::activate()` adds the node to the todo list.
- When a node changes value (i.e., `update()` returns true), its
neighbors are added to the todo list.
- `SpillPlacement::iterate()` only updates the nodes in the list.
The result of Hopfield iterations is not necessarily exact. It should
converge to a local minimum, but there is no guarantee that it will find
a global minimum. It is possible that updating nodes in a different
order will cause us to switch to a different local minimum. In other
words, this is not NFC, but although I saw a few runtime improvements
and regressions when I benchmarked this change, those were side effects
and actually the performance change is in the noise as expected.
Huge thanks to Jakob Stoklund Olesen <stoklund@2pi.dk> for his
feedbacks, guidance and time for the review.
llvm-svn: 270149
2016-05-20 06:40:37 +08:00
|
|
|
#include "llvm/ADT/SparseSet.h"
|
2011-01-06 09:21:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2013-07-17 02:26:15 +08:00
|
|
|
#include "llvm/Support/BlockFrequency.h"
|
2011-01-06 09:21:53 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
class BitVector;
|
|
|
|
class EdgeBundles;
|
2013-12-14 08:25:47 +08:00
|
|
|
class MachineBlockFrequencyInfo;
|
2017-09-22 07:20:16 +08:00
|
|
|
class MachineFunction;
|
|
|
|
class MachineLoopInfo;
|
2011-01-06 09:21:53 +08:00
|
|
|
|
2014-08-11 10:21:30 +08:00
|
|
|
class SpillPlacement : public MachineFunctionPass {
|
2011-01-06 09:21:53 +08:00
|
|
|
struct Node;
|
|
|
|
const MachineFunction *MF;
|
|
|
|
const EdgeBundles *bundles;
|
|
|
|
const MachineLoopInfo *loops;
|
2013-12-14 08:25:47 +08:00
|
|
|
const MachineBlockFrequencyInfo *MBFI;
|
2017-09-22 07:20:16 +08:00
|
|
|
Node *nodes = nullptr;
|
2011-01-06 09:21:53 +08:00
|
|
|
|
2011-04-07 03:13:57 +08:00
|
|
|
// Nodes that are active in the current computation. Owned by the prepare()
|
2011-01-06 09:21:53 +08:00
|
|
|
// caller.
|
|
|
|
BitVector *ActiveNodes;
|
|
|
|
|
2011-04-09 10:59:09 +08:00
|
|
|
// Nodes with active links. Populated by scanActiveBundles.
|
|
|
|
SmallVector<unsigned, 8> Linked;
|
|
|
|
|
|
|
|
// Nodes that went positive during the last call to scanActiveBundles or
|
|
|
|
// iterate.
|
|
|
|
SmallVector<unsigned, 8> RecentPositive;
|
2011-04-07 03:14:00 +08:00
|
|
|
|
2011-03-04 08:58:40 +08:00
|
|
|
// Block frequencies are computed once. Indexed by block number.
|
2014-08-11 10:21:30 +08:00
|
|
|
SmallVector<BlockFrequency, 8> BlockFrequencies;
|
2011-03-04 08:58:40 +08:00
|
|
|
|
2014-10-03 06:23:14 +08:00
|
|
|
/// Decision threshold. A node gets the output value 0 if the weighted sum of
|
|
|
|
/// its inputs falls in the open interval (-Threshold;Threshold).
|
|
|
|
BlockFrequency Threshold;
|
|
|
|
|
Reapply r263460: [SpillPlacement] Fix a quadratic behavior in spill placement.
Using Chandler's words from r265331:
This commit was greatly exacerbating PR17409 and effectively regressed
build time for lot of (very large) code when compiled with ASan or MSan.
PR17409 is fixed by r269249, so this is fine to reapply r263460.
Original commit message:
The bad behavior happens when we have a function with a long linear
chain of basic blocks, and have a live range spanning most of this
chain, but with very few uses.
Let say we have only 2 uses.
The Hopfield network is only seeded with two active blocks where the
uses are, and each iteration of the outer loop in
`RAGreedy::growRegion()` only adds two new nodes to the network due to
the completely linear shape of the CFG. Meanwhile,
`SpillPlacer->iterate()` visits the whole set of discovered nodes, which
adds up to a quadratic algorithm.
This is an historical accident effect from r129188.
When the Hopfield network is expanding, most of the action is happening
on the frontier where new nodes are being added. The internal nodes in
the network are not likely to be flip-flopping much, or they will at
least settle down very quickly. This means that while
`SpillPlacer->iterate()` is recomputing all the nodes in the network, it
is probably only the two frontier nodes that are changing their output.
Instead of recomputing the whole network on each iteration, we can
maintain a SparseSet of nodes that need to be updated:
- `SpillPlacement::activate()` adds the node to the todo list.
- When a node changes value (i.e., `update()` returns true), its
neighbors are added to the todo list.
- `SpillPlacement::iterate()` only updates the nodes in the list.
The result of Hopfield iterations is not necessarily exact. It should
converge to a local minimum, but there is no guarantee that it will find
a global minimum. It is possible that updating nodes in a different
order will cause us to switch to a different local minimum. In other
words, this is not NFC, but although I saw a few runtime improvements
and regressions when I benchmarked this change, those were side effects
and actually the performance change is in the noise as expected.
Huge thanks to Jakob Stoklund Olesen <stoklund@2pi.dk> for his
feedbacks, guidance and time for the review.
llvm-svn: 270149
2016-05-20 06:40:37 +08:00
|
|
|
/// List of nodes that need to be updated in ::iterate.
|
|
|
|
SparseSet<unsigned> TodoList;
|
|
|
|
|
2011-01-06 09:21:53 +08:00
|
|
|
public:
|
|
|
|
static char ID; // Pass identification, replacement for typeid.
|
|
|
|
|
2017-09-22 07:20:16 +08:00
|
|
|
SpillPlacement() : MachineFunctionPass(ID) {}
|
2015-04-11 10:11:45 +08:00
|
|
|
~SpillPlacement() override { releaseMemory(); }
|
2011-01-06 09:21:53 +08:00
|
|
|
|
|
|
|
/// BorderConstraint - A basic block has separate constraints for entry and
|
|
|
|
/// exit.
|
|
|
|
enum BorderConstraint {
|
|
|
|
DontCare, ///< Block doesn't care / variable not live.
|
|
|
|
PrefReg, ///< Block entry/exit prefers a register.
|
|
|
|
PrefSpill, ///< Block entry/exit prefers a stack slot.
|
2011-08-03 05:53:03 +08:00
|
|
|
PrefBoth, ///< Block entry prefers both register and stack.
|
2011-01-06 09:21:53 +08:00
|
|
|
MustSpill ///< A register is impossible, variable must be spilled.
|
|
|
|
};
|
|
|
|
|
|
|
|
/// BlockConstraint - Entry and exit constraints for a basic block.
|
|
|
|
struct BlockConstraint {
|
|
|
|
unsigned Number; ///< Basic block number (from MBB::getNumber()).
|
|
|
|
BorderConstraint Entry : 8; ///< Constraint on block entry.
|
|
|
|
BorderConstraint Exit : 8; ///< Constraint on block exit.
|
2011-08-03 05:53:03 +08:00
|
|
|
|
|
|
|
/// True when this block changes the value of the live range. This means
|
|
|
|
/// the block has a non-PHI def. When this is false, a live-in value on
|
|
|
|
/// the stack can be live-out on the stack without inserting a spill.
|
|
|
|
bool ChangesValue;
|
2021-02-19 00:50:00 +08:00
|
|
|
|
|
|
|
void print(raw_ostream &OS) const;
|
|
|
|
void dump() const;
|
2011-01-06 09:21:53 +08:00
|
|
|
};
|
|
|
|
|
2011-04-07 03:13:57 +08:00
|
|
|
/// prepare - Reset state and prepare for a new spill placement computation.
|
2011-01-06 09:21:53 +08:00
|
|
|
/// @param RegBundles Bit vector to receive the edge bundles where the
|
|
|
|
/// variable should be kept in a register. Each bit
|
|
|
|
/// corresponds to an edge bundle, a set bit means the
|
|
|
|
/// variable should be kept in a register through the
|
|
|
|
/// bundle. A clear bit means the variable should be
|
2011-04-07 03:13:57 +08:00
|
|
|
/// spilled. This vector is retained.
|
|
|
|
void prepare(BitVector &RegBundles);
|
|
|
|
|
|
|
|
/// addConstraints - Add constraints and biases. This method may be called
|
|
|
|
/// more than once to accumulate constraints.
|
|
|
|
/// @param LiveBlocks Constraints for blocks that have the variable live in or
|
2011-04-08 01:27:46 +08:00
|
|
|
/// live out.
|
2011-04-07 03:13:57 +08:00
|
|
|
void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
|
|
|
|
|
2011-08-03 05:53:03 +08:00
|
|
|
/// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
|
|
|
|
/// equivalent to calling addConstraint with identical BlockConstraints with
|
|
|
|
/// Entry = Exit = PrefSpill, and ChangesValue = false.
|
|
|
|
///
|
2011-07-23 11:10:19 +08:00
|
|
|
/// @param Blocks Array of block numbers that prefer to spill in and out.
|
2011-08-04 07:09:38 +08:00
|
|
|
/// @param Strong When true, double the negative bias for these blocks.
|
|
|
|
void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
|
2011-07-23 11:10:19 +08:00
|
|
|
|
2011-04-08 01:27:46 +08:00
|
|
|
/// addLinks - Add transparent blocks with the given numbers.
|
|
|
|
void addLinks(ArrayRef<unsigned> Links);
|
|
|
|
|
2011-04-09 10:59:09 +08:00
|
|
|
/// scanActiveBundles - Perform an initial scan of all bundles activated by
|
|
|
|
/// addConstraints and addLinks, updating their state. Add all the bundles
|
|
|
|
/// that now prefer a register to RecentPositive.
|
|
|
|
/// Prepare internal data structures for iterate.
|
|
|
|
/// Return true is there are any positive nodes.
|
|
|
|
bool scanActiveBundles();
|
|
|
|
|
|
|
|
/// iterate - Update the network iteratively until convergence, or new bundles
|
|
|
|
/// are found.
|
|
|
|
void iterate();
|
|
|
|
|
|
|
|
/// getRecentPositive - Return an array of bundles that became positive during
|
|
|
|
/// the previous call to scanActiveBundles or iterate.
|
|
|
|
ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
|
2011-04-07 03:14:00 +08:00
|
|
|
|
2011-04-07 03:13:57 +08:00
|
|
|
/// finish - Compute the optimal spill code placement given the
|
|
|
|
/// constraints. No MustSpill constraints will be violated, and the smallest
|
|
|
|
/// possible number of PrefX constraints will be violated, weighted by
|
|
|
|
/// expected execution frequencies.
|
|
|
|
/// The selected bundles are returned in the bitvector passed to prepare().
|
2011-01-06 09:21:53 +08:00
|
|
|
/// @return True if a perfect solution was found, allowing the variable to be
|
|
|
|
/// in a register through all relevant bundles.
|
2011-04-07 03:13:57 +08:00
|
|
|
bool finish();
|
2011-01-06 09:21:53 +08:00
|
|
|
|
2011-01-19 05:13:27 +08:00
|
|
|
/// getBlockFrequency - Return the estimated block execution frequency per
|
|
|
|
/// function invocation.
|
2013-07-17 02:26:18 +08:00
|
|
|
BlockFrequency getBlockFrequency(unsigned Number) const {
|
|
|
|
return BlockFrequencies[Number];
|
2011-03-04 08:58:40 +08:00
|
|
|
}
|
2011-01-19 05:13:27 +08:00
|
|
|
|
2011-01-06 09:21:53 +08:00
|
|
|
private:
|
2017-09-22 07:20:16 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &mf) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
2014-03-07 17:26:03 +08:00
|
|
|
void releaseMemory() override;
|
2011-01-06 09:21:53 +08:00
|
|
|
|
2017-09-22 07:20:16 +08:00
|
|
|
void activate(unsigned n);
|
2014-10-03 06:23:14 +08:00
|
|
|
void setThreshold(const BlockFrequency &Entry);
|
Reapply r263460: [SpillPlacement] Fix a quadratic behavior in spill placement.
Using Chandler's words from r265331:
This commit was greatly exacerbating PR17409 and effectively regressed
build time for lot of (very large) code when compiled with ASan or MSan.
PR17409 is fixed by r269249, so this is fine to reapply r263460.
Original commit message:
The bad behavior happens when we have a function with a long linear
chain of basic blocks, and have a live range spanning most of this
chain, but with very few uses.
Let say we have only 2 uses.
The Hopfield network is only seeded with two active blocks where the
uses are, and each iteration of the outer loop in
`RAGreedy::growRegion()` only adds two new nodes to the network due to
the completely linear shape of the CFG. Meanwhile,
`SpillPlacer->iterate()` visits the whole set of discovered nodes, which
adds up to a quadratic algorithm.
This is an historical accident effect from r129188.
When the Hopfield network is expanding, most of the action is happening
on the frontier where new nodes are being added. The internal nodes in
the network are not likely to be flip-flopping much, or they will at
least settle down very quickly. This means that while
`SpillPlacer->iterate()` is recomputing all the nodes in the network, it
is probably only the two frontier nodes that are changing their output.
Instead of recomputing the whole network on each iteration, we can
maintain a SparseSet of nodes that need to be updated:
- `SpillPlacement::activate()` adds the node to the todo list.
- When a node changes value (i.e., `update()` returns true), its
neighbors are added to the todo list.
- `SpillPlacement::iterate()` only updates the nodes in the list.
The result of Hopfield iterations is not necessarily exact. It should
converge to a local minimum, but there is no guarantee that it will find
a global minimum. It is possible that updating nodes in a different
order will cause us to switch to a different local minimum. In other
words, this is not NFC, but although I saw a few runtime improvements
and regressions when I benchmarked this change, those were side effects
and actually the performance change is in the noise as expected.
Huge thanks to Jakob Stoklund Olesen <stoklund@2pi.dk> for his
feedbacks, guidance and time for the review.
llvm-svn: 270149
2016-05-20 06:40:37 +08:00
|
|
|
|
2017-09-22 07:20:16 +08:00
|
|
|
bool update(unsigned n);
|
2011-01-06 09:21:53 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace llvm
|
|
|
|
|
2017-09-22 07:20:16 +08:00
|
|
|
#endif // LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
|