CFG editing functions

Summary:
This diff adds a number of methods to BinaryFunction that can be used to edit the CFG after it is created.

The basic public functions are:
  - createBasicBlock - create a new block that is not inserted into the CFG.
  - insertBasicBlocks - insert a range of blocks (made with createBasicBlock) into the CFG.
  - updateLayout - update the CFG layout (either by inserting new blocks at a certain point or recomputing the entire layout).
  - fixFallthroughBranch - add a direct jump to the fallthrough successor for a given block.

There are a number of private helper functions used to implement the above.

This was split off the ICP diff to simplify it a bit.

(cherry picked from FBD3611313)
This commit is contained in:
Bill Nell 2016-07-23 12:50:34 -07:00 committed by Maksim Panchenko
parent ab599fe71a
commit 50e011f4e5
3 changed files with 298 additions and 30 deletions

View File

@ -39,7 +39,13 @@ class BinaryContext;
/// The intention is to keep the structure similar to MachineBasicBlock as
/// we might switch to it at some point.
class BinaryBasicBlock {
public:
struct BinaryBranchInfo {
uint64_t Count;
uint64_t MispredictedCount; /// number of branches mispredicted
};
private:
/// Label associated with the block.
MCSymbol *Label{nullptr};
@ -83,11 +89,6 @@ class BinaryBasicBlock {
std::set<BinaryBasicBlock *> Throwers;
std::set<BinaryBasicBlock *> LandingPads;
struct BinaryBranchInfo {
uint64_t Count;
uint64_t MispredictedCount; /// number of branches mispredicted
};
/// Each successor has a corresponding BranchInfo entry in the list.
std::vector<BinaryBranchInfo> BranchInfo;
@ -301,10 +302,23 @@ public:
}
/// Add instruction at the end of this basic block.
void addInstruction(MCInst &Inst) {
void addInstruction(MCInst &&Inst) {
Instructions.emplace_back(Inst);
}
/// Add instruction at the end of this basic block.
void addInstruction(const MCInst &Inst) {
Instructions.push_back(Inst);
}
/// Add a range of instructions to the end of this basic block.
template <typename Itr>
void addInstructions(Itr Begin, Itr End) {
while (Begin != End) {
addInstruction(*Begin++);
}
}
/// Add instruction before Pos in this basic block.
const_iterator insertPseudoInstr(const_iterator Pos, MCInst &Instr) {
++NumPseudos;
@ -335,6 +349,24 @@ public:
uint64_t Count = 0,
uint64_t MispredictedCount = 0);
/// Add a range of successors.
template <typename Itr>
void addSuccessors(Itr Begin, Itr End) {
while (Begin != End) {
addSuccessor(*Begin++);
}
}
/// Add a range of successors with branch info.
template <typename Itr, typename BrItr>
void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) {
assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd));
while (Begin != End) {
const auto BrInfo = *BrBegin++;
addSuccessor(*Begin++, BrInfo.Count, BrInfo.MispredictedCount);
}
}
/// Adds block to landing pad list.
void addLandingPad(BinaryBasicBlock *LPBlock);
@ -342,6 +374,14 @@ public:
/// list of predecessors of /p Succ and update branch info.
void removeSuccessor(BinaryBasicBlock *Succ);
/// Remove a range of successor blocks.
template <typename Itr>
void removeSuccessors(Itr Begin, Itr End) {
while (Begin != End) {
removeSuccessor(*Begin++);
}
}
/// Return the information about the number of times this basic block was
/// executed.
///
@ -350,6 +390,11 @@ public:
return ExecutionCount;
}
/// Set the execution count for this block.
void setExecutionCount(uint64_t Count) {
ExecutionCount = Count;
}
bool isCold() const {
return IsCold;
}
@ -385,6 +430,21 @@ public:
return false;
}
/// Split apart the instructions in this basic block starting at Inst.
/// The instructions following Inst are removed and returned in a vector.
std::vector<MCInst> splitInstructions(const MCInst *Inst) {
std::vector<MCInst> SplitInst;
assert(!Instructions.empty());
while(&Instructions.back() != Inst) {
SplitInst.push_back(Instructions.back());
Instructions.pop_back();
}
std::reverse(SplitInst.begin(), SplitInst.end());
return SplitInst;
}
/// Sets the symbol pointing to the end of the BB in the output binary.
void setEndLabel(MCSymbol *Symbol) {
EndLabel = Symbol;
@ -436,6 +496,11 @@ private:
/// Remove predecessor of the basic block. Don't use directly, instead
/// use removeSuccessor() funciton.
void removePredecessor(BinaryBasicBlock *Pred);
/// Set offset of the basic block from the function start.
void setOffset(uint64_t NewOffset) {
Offset = NewOffset;
}
};
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);

View File

@ -99,6 +99,8 @@ BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
if (BasicBlocks.empty())
return nullptr;
// This is commented out because it makes BOLT too slow.
// assert(std::is_sorted(begin(), end()));
auto I = std::upper_bound(begin(),
end(),
BinaryBasicBlock(Offset));
@ -531,8 +533,61 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
return true;
}
bool BinaryFunction::buildCFG() {
void BinaryFunction::clearLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
// remove all landing pads/throws for the given collection of blocks
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
auto *BB = BasicBlocks[I];
for (auto *LPBlock : BB->LandingPads) {
auto count = LPBlock->Throwers.erase(BB);
assert(count == 1);
}
BB->LandingPads.clear();
}
}
void BinaryFunction::addLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
auto *BB = BasicBlocks[I];
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
MCSymbol *LP = BB->getLabel();
for (unsigned I : LPToBBIndex.at(LP)) {
assert(I < BasicBlocks.size());
BinaryBasicBlock *ThrowBB = BasicBlocks[I];
ThrowBB->addLandingPad(BB);
}
}
}
}
void BinaryFunction::recomputeLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
assert(LPToBBIndex.empty());
clearLandingPads(StartIndex, NumBlocks);
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
auto *BB = BasicBlocks[I];
for (auto &Instr : BB->Instructions) {
// Store info about associated landing pad.
if (BC.MIA->isInvoke(Instr)) {
const MCSymbol *LP;
uint64_t Action;
std::tie(LP, Action) = BC.MIA->getEHInfo(Instr);
if (LP) {
LPToBBIndex[LP].push_back(BB->Index);
}
}
}
}
addLandingPads(StartIndex, NumBlocks);
clearList(LPToBBIndex);
}
bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
@ -764,15 +819,7 @@ bool BinaryFunction::buildCFG() {
}
// Add associated landing pad blocks to each basic block.
for (auto BB : BasicBlocks) {
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
MCSymbol *LP = BB->getLabel();
for (unsigned I : LPToBBIndex.at(LP)) {
BinaryBasicBlock *ThrowBB = getBasicBlockAtIndex(I);
ThrowBB->addLandingPad(BB);
}
}
}
addLandingPads(0, BasicBlocks.size());
// Infer frequency for non-taken branches
if (hasValidProfile())
@ -1066,6 +1113,7 @@ bool BinaryFunction::fixCFIState() {
std::vector<uint32_t> NewCFIs;
uint32_t NestedLevel = 0;
for (uint32_t CurState = FromState; CurState < ToState; ++CurState) {
assert(CurState < FrameInstructions.size());
MCCFIInstruction *Instr = &FrameInstructions[CurState];
if (Instr->getOperation() == MCCFIInstruction::OpRememberState)
++NestedLevel;
@ -1311,6 +1359,8 @@ void BinaryFunction::dumpGraphToFile(std::string Filename) const {
const BinaryBasicBlock *
BinaryFunction::getOriginalLayoutSuccessor(const BinaryBasicBlock *BB) const {
// This is commented out because it makes BOLT run too slowly.
//assert(std::is_sorted(begin(), end()));
auto I = std::upper_bound(begin(), end(), *BB);
assert(I != begin() && "first basic block not at offset 0");
@ -1343,9 +1393,7 @@ void BinaryFunction::fixBranches() {
HotColdBorder = true;
}
const BinaryBasicBlock *OldFTBB = getOriginalLayoutSuccessor(BB);
const MCSymbol *OldFT = nullptr;
if (OldFTBB != nullptr)
OldFT = OldFTBB->getLabel();
const MCSymbol *OldFT = OldFTBB ? OldFTBB->getLabel() : nullptr;
// Case 1: There are no branches in this basic block and it just falls
// through
@ -1431,6 +1479,49 @@ void BinaryFunction::fixBranches() {
}
}
void BinaryFunction::fixFallthroughBranch(BinaryBasicBlock *Block) {
// No successors, must be a return or similar.
if (Block->succ_size() == 0) return;
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!BC.MIA->analyzeBranch(Block->Instructions, TBB, FBB, CondBranch,
UncondBranch)) {
assert(0);
return;
}
if (!UncondBranch) {
const BinaryBasicBlock* FallThroughBB = nullptr;
if (CondBranch) {
assert(TBB);
// Find the first successor that is not a target of the conditional
// branch.
for (auto *Succ : Block->successors()) {
if (Succ->getLabel() != TBB) {
FallThroughBB = Succ;
break;
}
}
} else {
// pick first successor as fallthrough.
FallThroughBB = *Block->succ_begin();
}
assert(FallThroughBB);
const auto FallThroughLabel = FallThroughBB->getLabel();
MCInst NewInst;
if (!BC.MIA->createUncondBranch(NewInst, FallThroughLabel, BC.Ctx.get())) {
llvm_unreachable("Target does not support creating new branches");
}
Block->addInstruction(NewInst);
}
}
void BinaryFunction::splitFunction() {
bool AllCold = true;
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
@ -1916,6 +2007,60 @@ std::size_t BinaryFunction::hash() const {
return std::hash<std::string>{}(Opcodes);
}
void BinaryFunction::insertBasicBlocks(
BinaryBasicBlock *Start,
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs) {
const auto StartIndex = getIndex(Start);
const auto NumNewBlocks = NewBBs.size();
BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1,
NumNewBlocks,
nullptr);
auto I = StartIndex + 1;
for (auto &BB : NewBBs) {
assert(!BasicBlocks[I]);
BasicBlocks[I++] = BB.release();
}
// Recompute indices and offsets for all basic blocks after Start.
uint64_t Offset = Start->getOffset();
for (auto I = StartIndex; I < BasicBlocks.size(); ++I) {
auto *BB = BasicBlocks[I];
BB->setOffset(Offset);
Offset += BC.computeCodeSize(BB->begin(), BB->end());
BB->Index = I;
}
// Recompute CFI state for all BBs.
BBCFIState.clear();
annotateCFIState();
recomputeLandingPads(StartIndex, NumNewBlocks + 1);
// Make sure the basic blocks are sorted properly.
assert(std::is_sorted(begin(), end()));
}
// TODO: Which of these methods is better?
void BinaryFunction::updateLayout(BinaryBasicBlock* Start,
const unsigned NumNewBlocks) {
// Insert new blocks in the layout immediately after Start.
auto Pos = std::find(layout_begin(), layout_end(), Start);
assert(Pos != layout_end());
auto Begin = &BasicBlocks[Start->Index + 1];
auto End = &BasicBlocks[Start->Index + NumNewBlocks + 1];
BasicBlocksLayout.insert(Pos + 1, Begin, End);
}
void BinaryFunction::updateLayout(LayoutType Type,
bool MinBranchClusters,
bool Split) {
// Recompute layout with original parameters.
BasicBlocksLayout = BasicBlocks;
modifyLayout(Type, MinBranchClusters, Split);
}
BinaryFunction::~BinaryFunction() {
for (auto BB : BasicBlocks) {
delete BB;

View File

@ -232,6 +232,20 @@ private:
const BinaryBasicBlock &BBOther, const BinaryFunction &BF,
bool AreInvokes) const;
/// Clear the landing pads for all blocks contained in the range of
/// [StartIndex, StartIndex + NumBlocks). This also has the effect of
/// removing throws that point to any of these blocks.
void clearLandingPads(const unsigned StartIndex, const unsigned NumBlocks);
/// Add landing pads for all blocks in the range
/// [StartIndex, StartIndex + NumBlocks) using LPToBBIndex.
void addLandingPads(const unsigned StartIndex, const unsigned NumBlocks);
/// Recompute the landing pad information for all the basic blocks in the
/// range of [StartIndex to StartIndex + NumBlocks).
void recomputeLandingPads(const unsigned StartIndex,
const unsigned NumBlocks);
/// Return basic block that originally was laid out immediately following
/// the given /p BB basic block.
const BinaryBasicBlock *
@ -593,6 +607,32 @@ public:
Names.emplace_back(NewName);
}
/// Create a basic block at a given \p Offset in the
/// function.
/// If \p DeriveAlignment is true, set the alignment of the block based
/// on the alignment of the existing offset.
/// The new block is not inserted into the CFG. The client must
/// use insertBasicBlocks to add any new blocks to the CFG.
///
std::unique_ptr<BinaryBasicBlock>
createBasicBlock(uint64_t Offset,
MCSymbol *Label = nullptr,
bool DeriveAlignment = false) {
assert(BC.Ctx && "cannot be called with empty context");
if (!Label) {
Label = BC.Ctx->createTempSymbol("BB", true);
}
auto BB = std::unique_ptr<BinaryBasicBlock>(
new BinaryBasicBlock(Label, this, Offset));
if (DeriveAlignment) {
uint64_t DerivedAlignment = Offset & (1 + ~Offset);
BB->setAlignment(std::min(DerivedAlignment, uint64_t(32)));
}
return BB;
}
/// Create a basic block at a given \p Offset in the
/// function and append it to the end of list of blocks.
/// If \p DeriveAlignment is true, set the alignment of the block based
@ -601,21 +641,16 @@ public:
/// Returns NULL if basic block already exists at the \p Offset.
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label,
bool DeriveAlignment = false) {
assert(!getBasicBlockAtOffset(Offset) && "basic block already exists");
assert(BC.Ctx && "cannot be called with empty context");
if (!Label)
Label = BC.Ctx->createTempSymbol("BB", true);
BasicBlocks.emplace_back(new BinaryBasicBlock(Label, this, Offset));
assert(CurrentState == State::CFG ||
(!getBasicBlockAtOffset(Offset) && "basic block already exists"));
auto BBPtr = createBasicBlock(Offset, Label, DeriveAlignment);
BasicBlocks.emplace_back(BBPtr.release());
auto BB = BasicBlocks.back();
if (DeriveAlignment) {
uint64_t DerivedAlignment = Offset & (1 + ~Offset);
BB->setAlignment(std::min(DerivedAlignment, uint64_t(32)));
}
BB->Index = BasicBlocks.size() - 1;
assert(CurrentState == State::CFG || std::is_sorted(begin(), end()));
return BB;
}
@ -636,6 +671,24 @@ public:
/// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);
/// Insert the BBs contained in NewBBs into the basic blocks for this
/// function. Update the associated state of all blocks as needed, i.e.
/// CFI state, BB offsets, BB indices. The new BBs are inserted after
/// Start. This operation could affect fallthrough branches for Start.
///
void insertBasicBlocks(
BinaryBasicBlock *Start,
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs);
/// Update the basic block layout for this function. The BBs from
/// [Start->Index, Start->Index + NumNewBlocks) are inserted into the
/// layout after the BB indicated by Start.
void updateLayout(BinaryBasicBlock* Start, const unsigned NumNewBlocks);
/// Update the basic block layout for this function. The layout is
/// computed from scratch using modifyLayout.
void updateLayout(LayoutType Type, bool MinBranchClusters, bool Split);
/// Dump function information to debug output. If \p PrintInstructions
/// is true - include instruction disassembly.
void dump(std::string Annotation = "", bool PrintInstructions = true) const;
@ -902,6 +955,11 @@ public:
/// adding jumps based on a new layout order.
void fixBranches();
/// If needed, add an unconditional jmp to the original fallthrough of
/// Block. This is used by the indirect call promotion optimization
/// since it inserts new BBs after the merge block.
void fixFallthroughBranch(BinaryBasicBlock *Block);
/// Split function in two: a part with warm or hot BBs and a part with never
/// executed BBs. The cold part is moved to a new BinaryFunction.
void splitFunction();