Split FrameAnalysis and improve LivenessAnalysis

Summary:
Split FrameAnalysis into FrameAnalysis and RegAnalysis, since
some optimizations only require register information about functions,
not frame information. Refactor callgraph walking code into the
CallGraphWalker class, allowing any analysis that depend on the call
graph to easily traverse it via a visitor pattern. Also fix
LivenessAnalysis, which was broken because it was not considering
registers read into callees and incorporating this into caller.

(cherry picked from FBD5177901)
This commit is contained in:
Rafael Auler 2017-06-02 16:57:22 -07:00 committed by Maksim Panchenko
parent d850ca3622
commit 2c23094299
19 changed files with 584 additions and 317 deletions

View File

@ -12,6 +12,7 @@
#include "BinaryFunctionCallGraph.h"
#include "BinaryFunction.h"
#include "BinaryContext.h"
#include "llvm/Support/Timer.h"
#define DEBUG_TYPE "callgraph"
@ -30,6 +31,7 @@ CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
}
std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
NamedRegionTimer T1("Build cg traversal order", "CG breakdown", true);
std::deque<BinaryFunction *> TopologicalOrder;
enum NodeStatus { NEW, VISITING, VISITED };
std::vector<NodeStatus> NodeStatus(Funcs.size());
@ -73,6 +75,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
bool IncludeColdCalls,
bool UseFunctionHotSize,
bool UseEdgeCounts) {
NamedRegionTimer T1("Callgraph construction", "CG breakdown", true);
BinaryFunctionCallGraph Cg;
// Add call graph nodes.

View File

@ -3,6 +3,7 @@ add_llvm_library(LLVMBOLTPasses
BinaryPasses.cpp
BinaryFunctionCallGraph.cpp
CallGraph.cpp
CallGraphWalker.cpp
DataflowAnalysis.cpp
DataflowInfoManager.cpp
FrameAnalysis.cpp
@ -13,6 +14,7 @@ add_llvm_library(LLVMBOLTPasses
Inliner.cpp
LivenessAnalysis.cpp
PettisAndHansen.cpp
RegAnalysis.cpp
ReorderAlgorithm.cpp
ReorderFunctions.cpp
ShrinkWrapping.cpp

View File

@ -0,0 +1,46 @@
#include "CallGraphWalker.h"
#include "llvm/Support/Timer.h"
namespace llvm {
namespace bolt {
void CallGraphWalker::traverseCG() {
NamedRegionTimer T1("CG Traversal", "CG breakdown", true);
std::queue<BinaryFunction *> Queue;
std::set<BinaryFunction *> InQueue;
for (auto *Func : TopologicalCGOrder) {
Queue.push(Func);
InQueue.insert(Func);
}
while (!Queue.empty()) {
auto *Func = Queue.front();
Queue.pop();
InQueue.erase(Func);
bool Changed{false};
for (auto Visitor : Visitors) {
bool CurVisit = Visitor(Func);
Changed = Changed || CurVisit;
}
if (Changed) {
for (auto CallerID : CG.predecessors(CG.getNodeId(Func))) {
BinaryFunction *CallerFunc = CG.nodeIdToFunc(CallerID);
if (InQueue.count(CallerFunc))
continue;
Queue.push(CallerFunc);
InQueue.insert(CallerFunc);
}
}
}
}
void CallGraphWalker::walk() {
TopologicalCGOrder = CG.buildTraversalOrder();
traverseCG();
}
}
}

View File

@ -0,0 +1,67 @@
//===--- Passes/CallGraphWalker.h -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPHWALKER_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPHWALKER_H
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "BinaryFunctionCallGraph.h"
#include <queue>
#include <map>
#include <set>
#include <vector>
namespace llvm {
namespace bolt {
/// Perform a bottom-up walk of the call graph with the intent of computing
/// a property that depends on callees. In the event of a CG cycles, this will
/// re-visit functions until their observed property converges.
class CallGraphWalker {
BinaryContext &BC;
std::map<uint64_t, BinaryFunction> &BFs;
BinaryFunctionCallGraph &CG;
/// DFS or reverse post-ordering of the call graph nodes to allow us to
/// traverse the call graph bottom-up
std::deque<BinaryFunction *> TopologicalCGOrder;
/// Stores all visitor functions to call when traversing the call graph
typedef std::function<bool(BinaryFunction*)> CallbackTy;
std::vector<CallbackTy> Visitors;
/// Do the bottom-up traversal
void traverseCG();
public:
/// Initialize core context references but don't do anything yet
CallGraphWalker(BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunctionCallGraph &CG)
: BC(BC), BFs(BFs), CG(CG) {}
/// Register a new callback function to be called for each function when
/// traversing the call graph bottom-up. Function should return true iff
/// whatever information it is keeping track of has changed. Function must
/// converge with time, ie, it must eventually return false, otherwise the
/// call graph walk will never finish.
void registerVisitor(CallbackTy Callback) {
Visitors.emplace_back(Callback);
}
/// Build the call graph, establish a traversal order and traverse it.
void walk();
};
}
}
#endif

View File

@ -18,8 +18,8 @@ namespace bolt {
ReachingDefOrUse</*Def=*/true> &DataflowInfoManager::getReachingDefs() {
if (RD)
return *RD;
assert(FA && "FrameAnalysis required");
RD.reset(new ReachingDefOrUse<true>(*FA, BC, BF));
assert(RA && "RegAnalysis required");
RD.reset(new ReachingDefOrUse<true>(*RA, BC, BF));
RD->run();
return *RD;
}
@ -31,8 +31,8 @@ void DataflowInfoManager::invalidateReachingDefs() {
ReachingDefOrUse</*Def=*/false> &DataflowInfoManager::getReachingUses() {
if (RU)
return *RU;
assert(FA && "FrameAnalysis required");
RU.reset(new ReachingDefOrUse<false>(*FA, BC, BF));
assert(RA && "RegAnalysis required");
RU.reset(new ReachingDefOrUse<false>(*RA, BC, BF));
RU->run();
return *RU;
}
@ -44,8 +44,8 @@ void DataflowInfoManager::invalidateReachingUses() {
LivenessAnalysis &DataflowInfoManager::getLivenessAnalysis() {
if (LA)
return *LA;
assert(FA && "FrameAnalysis required");
LA.reset(new LivenessAnalysis(*FA, BC, BF));
assert(RA && "RegAnalysis required");
LA.reset(new LivenessAnalysis(*RA, BC, BF));
LA->run();
return *LA;
}

View File

@ -12,14 +12,15 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_DATAFLOWINFOMANAGER_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_DATAFLOWINFOMANAGER_H
#include "FrameAnalysis.h"
#include "ReachingDefOrUse.h"
#include "StackReachingUses.h"
#include "DominatorAnalysis.h"
#include "StackPointerTracking.h"
#include "ReachingInsns.h"
#include "FrameAnalysis.h"
#include "LivenessAnalysis.h"
#include "ReachingDefOrUse.h"
#include "ReachingInsns.h"
#include "RegAnalysis.h"
#include "StackAllocationAnalysis.h"
#include "StackPointerTracking.h"
#include "StackReachingUses.h"
namespace llvm {
namespace bolt {
@ -29,6 +30,7 @@ namespace bolt {
/// recompute it. Also provide an interface for data invalidation when the
/// analysis is outdated after a transform pass modified the function.
class DataflowInfoManager {
const RegAnalysis *RA;
const FrameAnalysis *FA;
const BinaryContext &BC;
BinaryFunction &BF;
@ -46,8 +48,9 @@ class DataflowInfoManager {
InsnToBB;
public:
DataflowInfoManager(const FrameAnalysis *FA, const BinaryContext &BC,
BinaryFunction &BF) : FA(FA), BC(BC), BF(BF) {};
DataflowInfoManager(const BinaryContext &BC, BinaryFunction &BF,
const RegAnalysis *RA, const FrameAnalysis *FA)
: RA(RA), FA(FA), BC(BC), BF(BF){};
/// Helper function to fetch the parent BB associated with a program point
/// If PP is a BB itself, then return itself (cast to a BinaryBasicBlock)

View File

@ -9,6 +9,7 @@
//
//===----------------------------------------------------------------------===//
#include "FrameAnalysis.h"
#include "CallGraphWalker.h"
#include <fstream>
#define DEBUG_TYPE "fa"
@ -213,9 +214,8 @@ public:
} // end anonymous namespace
void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst,
ArgAccesses &&AA) {
if (auto OldAA = getArgAccessesFor(BC, Inst)) {
void FrameAnalysis::addArgAccessesFor(MCInst &Inst, ArgAccesses &&AA) {
if (auto OldAA = getArgAccessesFor(Inst)) {
if (OldAA->AssumeEverything)
return;
*OldAA = std::move(AA);
@ -231,13 +231,12 @@ void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst,
ArgAccessesVector.emplace_back(std::move(AA));
}
void FrameAnalysis::addArgInStackAccessFor(const BinaryContext &BC,
MCInst &Inst,
void FrameAnalysis::addArgInStackAccessFor(MCInst &Inst,
const ArgInStackAccess &Arg) {
auto AA = getArgAccessesFor(BC, Inst);
auto AA = getArgAccessesFor(Inst);
if (!AA) {
addArgAccessesFor(BC, Inst, ArgAccesses(false));
AA = getArgAccessesFor(BC, Inst);
addArgAccessesFor(Inst, ArgAccesses(false));
AA = getArgAccessesFor(Inst);
assert(AA && "Object setup failed");
}
auto &Set = AA->Set;
@ -245,15 +244,13 @@ void FrameAnalysis::addArgInStackAccessFor(const BinaryContext &BC,
Set.emplace(Arg);
}
void FrameAnalysis::addFIEFor(const BinaryContext &BC, MCInst &Inst,
const FrameIndexEntry &FIE) {
void FrameAnalysis::addFIEFor(MCInst &Inst, const FrameIndexEntry &FIE) {
BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "FrameAccessEntry",
(unsigned)FIEVector.size());
FIEVector.emplace_back(FIE);
}
ErrorOr<ArgAccesses &>
FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, const MCInst &Inst) {
ErrorOr<ArgAccesses &> FrameAnalysis::getArgAccessesFor(const MCInst &Inst) {
if (auto Idx = BC.MIA->tryGetAnnotationAs<unsigned>(Inst, "ArgAccessEntry")) {
assert(ArgAccessesVector.size() > *Idx && "Out of bounds");
return ArgAccessesVector[*Idx];
@ -262,8 +259,7 @@ FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, const MCInst &Inst) {
}
ErrorOr<const ArgAccesses &>
FrameAnalysis::getArgAccessesFor(const BinaryContext &BC,
const MCInst &Inst) const {
FrameAnalysis::getArgAccessesFor(const MCInst &Inst) const {
if (auto Idx = BC.MIA->tryGetAnnotationAs<unsigned>(Inst, "ArgAccessEntry")) {
assert(ArgAccessesVector.size() > *Idx && "Out of bounds");
return ArgAccessesVector[*Idx];
@ -272,7 +268,7 @@ FrameAnalysis::getArgAccessesFor(const BinaryContext &BC,
}
ErrorOr<const FrameIndexEntry &>
FrameAnalysis::getFIEFor(const BinaryContext &BC, const MCInst &Inst) const {
FrameAnalysis::getFIEFor(const MCInst &Inst) const {
if (auto Idx =
BC.MIA->tryGetAnnotationAs<unsigned>(Inst, "FrameAccessEntry")) {
assert(FIEVector.size() > *Idx && "Out of bounds");
@ -281,130 +277,17 @@ FrameAnalysis::getFIEFor(const BinaryContext &BC, const MCInst &Inst) const {
return make_error_code(errc::result_out_of_range);
}
void FrameAnalysis::getInstClobberList(const BinaryContext &BC,
const MCInst &Inst,
BitVector &KillSet) const {
if (!BC.MIA->isCall(Inst)) {
BC.MIA->getClobberedRegs(Inst, KillSet, *BC.MRI);
return;
}
void FrameAnalysis::traverseCG(BinaryFunctionCallGraph &CG) {
CallGraphWalker CGWalker(BC, BFs, CG);
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
// If indirect call, kill set should have all elements
if (TargetSymbol == nullptr) {
KillSet.set(0, KillSet.size());
return;
}
CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool {
return computeArgsAccessed(*Func);
});
const auto *Function = BC.getFunctionForSymbol(TargetSymbol);
if (Function == nullptr) {
// Call to a function without a BinaryFunction object.
// This should be a call to a PLT entry, and since it is a trampoline to
// a DSO, we can't really know the code in advance. Conservatively assume
// everything is clobbered.
KillSet.set(0, KillSet.size());
return;
}
auto BV = RegsKilledMap.find(Function);
if (BV != RegsKilledMap.end()) {
KillSet |= BV->second;
return;
}
// Ignore calls to function whose clobber list wasn't yet calculated. This
// instruction will be evaluated again once we have info for the callee.
return;
CGWalker.walk();
}
BitVector FrameAnalysis::getFunctionClobberList(const BinaryContext &BC,
const BinaryFunction *Func) {
BitVector RegsKilled = BitVector(BC.MRI->getNumRegs(), false);
if (!Func->isSimple() || !Func->hasCFG()) {
RegsKilled.set(0, RegsKilled.size());
return RegsKilled;
}
for (const auto &BB : *Func) {
for (const auto &Inst : BB) {
getInstClobberList(BC, Inst, RegsKilled);
}
}
return RegsKilled;
}
void FrameAnalysis::buildClobberMap(const BinaryContext &BC) {
std::queue<BinaryFunction *> Queue;
std::set<BinaryFunction *> InQueue;
for (auto *Func : TopologicalCGOrder) {
Queue.push(Func);
InQueue.insert(Func);
}
while (!Queue.empty()) {
auto *Func = Queue.front();
Queue.pop();
InQueue.erase(Func);
BitVector RegsKilled = getFunctionClobberList(BC, Func);
bool ArgsUpdated = ClobberAnalysisOnly ? false : computeArgsAccessed(BC, *Func);
bool RegsUpdated = false;
if (RegsKilledMap.find(Func) == RegsKilledMap.end()) {
RegsKilledMap[Func] = std::move(RegsKilled);
} else {
RegsUpdated = RegsKilledMap[Func] != RegsKilled;
if (RegsUpdated)
RegsKilledMap[Func] = std::move(RegsKilled);
}
if (RegsUpdated || ArgsUpdated) {
for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) {
BinaryFunction *CallerFunc = Cg.nodeIdToFunc(Caller);
if (!InQueue.count(CallerFunc)) {
InQueue.insert(CallerFunc);
Queue.push(CallerFunc);
}
}
}
}
if (opts::Verbosity == 0) {
#ifndef NDEBUG
if (!DebugFlag || !isCurrentDebugType("fa"))
return;
#else
return;
#endif
}
// This loop is for computing statistics only
for (auto *Func : TopologicalCGOrder) {
auto Iter = RegsKilledMap.find(Func);
assert(Iter != RegsKilledMap.end() &&
"Failed to compute all clobbers list");
if (Iter->second.all()) {
auto Count = Func->getExecutionCount();
if (Count != BinaryFunction::COUNT_NO_PROFILE)
CountFunctionsAllClobber += Count;
++NumFunctionsAllClobber;
}
DEBUG_WITH_TYPE("fa",
dbgs() << "Killed regs set for func: " << Func->getPrintName() << "\n";
const BitVector &RegsKilled = Iter->second;
int RegIdx = RegsKilled.find_first();
while (RegIdx != -1) {
dbgs() << "\tREG" << RegIdx;
RegIdx = RegsKilled.find_next(RegIdx);
};
dbgs() << "\n";
);
}
}
bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC,
const BinaryFunction &BF, MCInst &Inst,
bool FrameAnalysis::updateArgsTouchedFor(const BinaryFunction &BF, MCInst &Inst,
int CurOffset) {
if (!BC.MIA->isCall(Inst))
return false;
@ -413,7 +296,7 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC,
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
// If indirect call, we conservatively assume it accesses all stack positions
if (TargetSymbol == nullptr) {
addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true));
addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true));
bool Updated{false};
if (!FunctionsRequireAlignment.count(&BF)) {
Updated = true;
@ -426,7 +309,7 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC,
// Call to a function without a BinaryFunction object. Conservatively assume
// it accesses all stack positions
if (Function == nullptr) {
addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true));
addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true));
bool Updated{false};
if (!FunctionsRequireAlignment.count(&BF)) {
Updated = true;
@ -459,27 +342,25 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC,
if (CurOffset == StackPointerTracking::EMPTY ||
CurOffset == StackPointerTracking::SUPERPOSITION) {
addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true));
addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true));
return Changed;
}
for (auto Elem : Iter->second) {
if (Elem.first == -1) {
addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true));
addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true));
break;
}
DEBUG(dbgs() << "Added arg in stack access annotation "
<< CurOffset + Elem.first << "\n");
addArgInStackAccessFor(
BC, Inst,
ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first,
/*Size=*/Elem.second});
Inst, ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first,
/*Size=*/Elem.second});
}
return Changed;
}
bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC,
BinaryFunction &BF) {
bool FrameAnalysis::computeArgsAccessed(BinaryFunction &BF) {
if (!BF.isSimple() || !BF.hasCFG()) {
DEBUG(dbgs() << "Treating " << BF.getPrintName() << " conservatively.\n");
bool Updated = false;
@ -505,7 +386,7 @@ bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC,
// Check for calls -- attach stack accessing info to them regarding their
// target
if (updateArgsTouchedFor(BC, BF, Inst, FAA.getSPOffset()))
if (updateArgsTouchedFor(BF, Inst, FAA.getSPOffset()))
UpdatedArgsTouched = true;
// Check for stack accesses that affect callers
@ -548,8 +429,7 @@ bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC,
return UpdatedArgsTouched || UpdatedAlignedStatus;
}
bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC,
BinaryFunction &BF) {
bool FrameAnalysis::restoreFrameIndex(BinaryFunction &BF) {
FrameAccessAnalysis FAA(BC, BF);
DEBUG(dbgs() << "Restoring frame indices for \"" << BF.getPrintName()
@ -572,7 +452,7 @@ bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC,
const FrameIndexEntry &FIE = FAA.getFIE();
addFIEFor(BC, Inst, FIE);
addFIEFor(Inst, FIE);
DEBUG({
dbgs() << "Frame index annotation " << FIE << " added to:\n";
BC.printInstruction(dbgs(), Inst, 0, &BF, true);
@ -582,8 +462,7 @@ bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC,
return true;
}
void FrameAnalysis::cleanAnnotations(const BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs) {
void FrameAnalysis::cleanAnnotations() {
for (auto &I : BFs) {
for (auto &BB : I.second) {
for (auto &Inst : BB) {
@ -594,24 +473,15 @@ void FrameAnalysis::cleanAnnotations(const BinaryContext &BC,
}
}
void FrameAnalysis::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
{
NamedRegionTimer T1("Callgraph construction", "FOP breakdown", true);
Cg = buildCallGraph(BC, BFs);
}
{
NamedRegionTimer T1("build cg traversal order", "FOP breakdown", true);
TopologicalCGOrder = Cg.buildTraversalOrder();
}
{
NamedRegionTimer T1("build clobber map", "FOP breakdown", true);
buildClobberMap(BC);
}
FrameAnalysis::FrameAnalysis(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunctionCallGraph &CG)
: BC(BC), BFs(BFs) {
// Position 0 of the vector should be always associated with "assume access
// everything".
ArgAccessesVector.emplace_back(ArgAccesses(/*AssumeEverything*/ true));
if (ClobberAnalysisOnly)
return;
traverseCG(CG);
for (auto &I : BFs) {
auto Count = I.second.getExecutionCount();
@ -630,7 +500,7 @@ void FrameAnalysis::runOnFunctions(BinaryContext &BC,
{
NamedRegionTimer T1("restore frame index", "FOP breakdown", true);
if (!restoreFrameIndex(BC, I.second)) {
if (!restoreFrameIndex(I.second)) {
++NumFunctionsFailedRestoreFI;
auto Count = I.second.getExecutionCount();
if (Count != BinaryFunction::COUNT_NO_PROFILE)
@ -643,12 +513,7 @@ void FrameAnalysis::runOnFunctions(BinaryContext &BC,
}
void FrameAnalysis::printStats() {
outs() << "BOLT-INFO FRAME ANALYSIS: Number of functions conservatively "
"treated as clobbering all registers: "
<< NumFunctionsAllClobber
<< format(" (%.1lf%% dyn cov)\n",
(100.0 * CountFunctionsAllClobber / CountDenominator))
<< "BOLT-INFO FRAME ANALYSIS: " << NumFunctionsNotOptimized
outs() << "BOLT-INFO FRAME ANALYSIS: " << NumFunctionsNotOptimized
<< " function(s) "
<< format("(%.1lf%% dyn cov)",
(100.0 * CountFunctionsNotOptimized / CountDenominator))

View File

@ -12,8 +12,8 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEANALYSIS_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEANALYSIS_H
#include "BinaryPasses.h"
#include "BinaryFunctionCallGraph.h"
#include "BinaryPasses.h"
#include "StackPointerTracking.h"
namespace llvm {
@ -111,17 +111,9 @@ raw_ostream &operator<<(raw_ostream &OS,
/// ... callee may access any position of our current stack frame
/// }
///
class FrameAnalysis : public BinaryFunctionPass {
/// Call graph info
BinaryFunctionCallGraph Cg;
/// DFS or reverse post-ordering of the call graph nodes to allow us to
/// traverse the call graph bottom-up
std::deque<BinaryFunction *> TopologicalCGOrder;
/// Map functions to the set of registers they may overwrite starting at when
/// it is called until it returns to the caller.
std::map<const BinaryFunction *, BitVector> RegsKilledMap;
class FrameAnalysis {
BinaryContext &BC;
std::map<uint64_t, BinaryFunction> &BFs;
/// Map functions to the set of <stack offsets, size> tuples representing
/// accesses to stack positions that belongs to caller
@ -142,70 +134,44 @@ class FrameAnalysis : public BinaryFunctionPass {
std::vector<FrameIndexEntry> FIEVector;
/// Analysis stats counters
uint64_t NumFunctionsAllClobber{0};
uint64_t CountFunctionsAllClobber{0};
uint64_t NumFunctionsNotOptimized{0};
uint64_t NumFunctionsFailedRestoreFI{0};
uint64_t CountFunctionsNotOptimized{0};
uint64_t CountFunctionsFailedRestoreFI{0};
uint64_t CountDenominator{0};
/// If this flag is set to true, the analysis will never run completely,
/// but will stop after callgraph and a clobber analysis for every function
/// has been computed.
bool ClobberAnalysisOnly{false};
/// Convenience functions for appending MCAnnotations to instructions with
/// our specific data
void addArgAccessesFor(const BinaryContext &BC, MCInst &Inst,
ArgAccesses &&AA);
void addArgInStackAccessFor(const BinaryContext &BC, MCInst &Inst,
const ArgInStackAccess &Arg);
void addFIEFor(const BinaryContext &BC, MCInst &Inst,
const FrameIndexEntry &FIE);
/// Compute the set of registers \p Func may write to during its execution,
/// starting at the point when it is called up until when it returns. Returns
/// a BitVector the size of the target number of registers, representing the
/// set of clobbered registers.
BitVector getFunctionClobberList(const BinaryContext &BC,
const BinaryFunction *Func);
void addArgAccessesFor(MCInst &Inst, ArgAccesses &&AA);
void addArgInStackAccessFor(MCInst &Inst, const ArgInStackAccess &Arg);
void addFIEFor(MCInst &Inst, const FrameIndexEntry &FIE);
/// Perform the step of building the set of registers clobbered by each
/// function execution, populating RegsKilledMap.
void buildClobberMap(const BinaryContext &BC);
/// function execution, populating RegsKilledMap and RegsGenMap.
void traverseCG(BinaryFunctionCallGraph &CG);
/// Analyzes an instruction and if it is a call, checks the called function
/// to record which args in stack are accessed, if any. Returns true if
/// the args data associated with this instruction were updated.
bool updateArgsTouchedFor(const BinaryContext &BC, const BinaryFunction &BF,
MCInst &Inst, int CurOffset);
bool updateArgsTouchedFor(const BinaryFunction &BF, MCInst &Inst,
int CurOffset);
/// Performs a pass over \p BF to check for accesses to arguments in stack,
/// flagging those as accessing the caller stack frame. All functions called
/// by \p BF must have been previously analyzed. Returns true if updated
/// args data about this function.
bool computeArgsAccessed(const BinaryContext &BC, BinaryFunction &BF);
bool computeArgsAccessed(BinaryFunction &BF);
/// Alias analysis to disambiguate which frame position is accessed by each
/// instruction in function \p BF. Add MCAnnotation<FrameIndexEntry> to
/// instructions that access a frame position. Return false if it failed
/// to analyze and this information can't be safely determined for \p BF.
bool restoreFrameIndex(const BinaryContext &BC, BinaryFunction &BF);
bool restoreFrameIndex(BinaryFunction &BF);
public:
explicit FrameAnalysis(const cl::opt<bool> &PrintPass,
bool ClobberAnalysisOnly=false)
: BinaryFunctionPass(PrintPass),
ClobberAnalysisOnly(ClobberAnalysisOnly) {
// Position 0 of the vector should be always associated with "assume access
// everything".
ArgAccessesVector.emplace_back(ArgAccesses(/*AssumeEverything*/ true));
}
const char *getName() const override {
return "frame-analysis";
}
explicit FrameAnalysis(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunctionCallGraph &CG);
/// Return true if we could fully analyze \p Func
bool hasFrameInfo(const BinaryFunction &Func) const {
@ -217,30 +183,19 @@ public:
return FunctionsRequireAlignment.count(&Func);
}
/// Compute the set of registers \p Inst may write to, marking them in
/// \p KillSet. If this is a call, try to get the set of registers the call
/// target will write to.
void getInstClobberList(const BinaryContext &BC, const MCInst &Inst,
BitVector &KillSet) const;
/// Functions for retrieving our specific MCAnnotation data from instructions
ErrorOr<ArgAccesses &> getArgAccessesFor(const BinaryContext &BC,
const MCInst &Inst);
ErrorOr<ArgAccesses &> getArgAccessesFor(const MCInst &Inst);
ErrorOr<const ArgAccesses &> getArgAccessesFor(const BinaryContext &BC,
const MCInst &Inst) const;
ErrorOr<const ArgAccesses &> getArgAccessesFor(const MCInst &Inst) const;
ErrorOr<const FrameIndexEntry &> getFIEFor(const BinaryContext &BC,
const MCInst &Inst) const;
/// Pass entry point
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
ErrorOr<const FrameIndexEntry &> getFIEFor(const MCInst &Inst) const;
/// Remove all MCAnnotations attached by this pass
void cleanAnnotations(const BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs);
void cleanAnnotations();
~FrameAnalysis() {
cleanAnnotations();
}
/// Print to standard output statistics about the analysis performed by this

View File

@ -10,7 +10,6 @@
//===----------------------------------------------------------------------===//
#include "FrameOptimizer.h"
#include "FrameAnalysis.h"
#include "ShrinkWrapping.h"
#include "StackAvailableExpressions.h"
#include "StackReachingUses.h"
@ -45,10 +44,11 @@ FrameOptimization("frame-opt",
namespace llvm {
namespace bolt {
void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA,
void FrameOptimizerPass::removeUnnecessaryLoads(const RegAnalysis &RA,
const FrameAnalysis &FA,
const BinaryContext &BC,
BinaryFunction &BF) {
StackAvailableExpressions SAE(FA, BC, BF);
StackAvailableExpressions SAE(RA, FA, BC, BF);
SAE.run();
DEBUG(dbgs() << "Performing unnecessary loads removal\n");
@ -71,7 +71,7 @@ void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA,
// if Inst is a load from stack and the current available expressions show
// this value is available in a register or immediate, replace this load
// with move from register or from immediate.
auto FIEX = FA.getFIEFor(BC, Inst);
auto FIEX = FA.getFIEFor(Inst);
if (!FIEX) {
Prev = &Inst;
continue;
@ -88,7 +88,7 @@ void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA,
for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB);
I != ExprEnd; ++I) {
const MCInst *AvailableInst = *I;
auto FIEY = FA.getFIEFor(BC, *AvailableInst);
auto FIEY = FA.getFIEFor(*AvailableInst);
if (!FIEY)
continue;
assert(FIEY->IsStore && FIEY->IsSimple);
@ -172,7 +172,7 @@ void FrameOptimizerPass::removeUnusedStores(const FrameAnalysis &FA,
(*I)->dump();
}
});
auto FIEX = FA.getFIEFor(BC, Inst);
auto FIEX = FA.getFIEFor(Inst);
if (!FIEX) {
Prev = &Inst;
continue;
@ -217,8 +217,9 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC,
return;
// Run FrameAnalysis pass
FrameAnalysis FA(PrintPass);
FA.runOnFunctions(BC, BFs, LargeFunctions);
BinaryFunctionCallGraph CG = buildCallGraph(BC, BFs);
FrameAnalysis FA(BC, BFs, CG);
RegAnalysis RA(BC, BFs, CG);
// Our main loop: perform caller-saved register optimizations, then
// callee-saved register optimizations (shrink wrapping).
@ -237,7 +238,7 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC,
}
{
NamedRegionTimer T1("remove loads", "FOP breakdown", true);
removeUnnecessaryLoads(FA, BC, I.second);
removeUnnecessaryLoads(RA, FA, BC, I.second);
}
{
NamedRegionTimer T1("remove stores", "FOP breakdown", true);
@ -248,14 +249,12 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC,
continue;
{
NamedRegionTimer T1("move spills", "FOP breakdown", true);
DataflowInfoManager Info(&FA, BC, I.second);
DataflowInfoManager Info(BC, I.second, &RA, &FA);
ShrinkWrapping SW(FA, BC, I.second, Info);
SW.perform();
}
}
FA.cleanAnnotations(BC, BFs);
outs() << "BOLT-INFO: FOP optimized " << NumRedundantLoads
<< " redundant load(s) and " << NumRedundantStores
<< " unused store(s)\n";

View File

@ -14,6 +14,7 @@
#include "BinaryPasses.h"
#include "FrameAnalysis.h"
#include "RegAnalysis.h"
namespace llvm {
namespace bolt {
@ -86,7 +87,8 @@ class FrameOptimizerPass : public BinaryFunctionPass {
/// Perform a dataflow analysis in \p BF to reveal unnecessary reloads from
/// the frame. Use the analysis to convert memory loads to register moves or
/// immediate loads. Delete redundant register moves.
void removeUnnecessaryLoads(const FrameAnalysis &FA,
void removeUnnecessaryLoads(const RegAnalysis &RA,
const FrameAnalysis &FA,
const BinaryContext &BC,
BinaryFunction &BF);

View File

@ -679,9 +679,12 @@ void IndirectCallPromotion::runOnFunctions(
if (opts::IndirectCallPromotion == ICP_NONE)
return;
FrameAnalysis FA(PrintPass, /*ClobberAnalysisOnly=*/true);
if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES)
FA.runOnFunctions(BC, BFs, LargeFunctions);
std::unique_ptr<RegAnalysis> RA;
std::unique_ptr<BinaryFunctionCallGraph> CG;
if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES) {
CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC, BFs)));
RA.reset(new RegAnalysis(BC, BFs, *CG));
}
for (auto &BFIt : BFs) {
auto &Function = BFIt.second;
@ -716,7 +719,7 @@ void IndirectCallPromotion::runOnFunctions(
if (BBs.empty())
continue;
DataflowInfoManager Info(&FA, BC, Function);
DataflowInfoManager Info(BC, Function, RA.get(), nullptr);
while (!BBs.empty()) {
auto *BB = BBs.back();
BBs.pop_back();
@ -864,9 +867,6 @@ void IndirectCallPromotion::runOnFunctions(
TotalIndirectJmps += FuncTotalIndirectJmps;
}
if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES)
FA.cleanAnnotations(BC, BFs);
outs() << "BOLT-INFO: ICP total indirect callsites = "
<< TotalIndirectCallsites
<< "\n"

View File

@ -13,9 +13,14 @@
#define LLVM_TOOLS_LLVM_BOLT_PASSES_LIVENESSANALYSIS_H
#include "DataflowAnalysis.h"
#include "FrameAnalysis.h"
#include "RegAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
namespace opts {
extern llvm::cl::opt<bool> AssumeABI;
}
namespace llvm {
namespace bolt {
@ -24,9 +29,9 @@ class LivenessAnalysis
friend class DataflowAnalysis<LivenessAnalysis, BitVector, true>;
public:
LivenessAnalysis(const FrameAnalysis &FA, const BinaryContext &BC,
LivenessAnalysis(const RegAnalysis &RA, const BinaryContext &BC,
BinaryFunction &BF)
: DataflowAnalysis<LivenessAnalysis, BitVector, true>(BC, BF), FA(FA),
: DataflowAnalysis<LivenessAnalysis, BitVector, true>(BC, BF), RA(RA),
NumRegs(BC.MRI->getNumRegs()) {}
virtual ~LivenessAnalysis();
@ -42,9 +47,21 @@ public:
DataflowAnalysis<LivenessAnalysis, BitVector, true>::run();
}
// Return a usable general-purpose reg after point P. Return 0 if no reg is
// available.
MCPhysReg scavengeRegAfter(ProgramPoint P) {
BitVector BV = *this->getStateAt(P);
BV.flip();
BitVector GPRegs(NumRegs, false);
this->BC.MIA->getGPRegs(GPRegs, *this->BC.MRI);
BV &= GPRegs;
int Reg = BV.find_first();
return Reg != -1 ? Reg : 0;
}
protected:
/// Reference to the result of stack frame analysis
const FrameAnalysis &FA;
/// Reference to the result of reg analysis
const RegAnalysis &RA;
const uint16_t NumRegs;
void preflight() {}
@ -63,18 +80,34 @@ protected:
BitVector computeNext(const MCInst &Point, const BitVector &Cur) {
BitVector Next = Cur;
bool IsCall = this->BC.MIA->isCall(Point);
// Kill
auto Written = BitVector(NumRegs, false);
if (this->BC.MIA->isCall(Point))
FA.getInstClobberList(this->BC, Point, Written);
else
if (!IsCall) {
this->BC.MIA->getWrittenRegs(Point, Written, *this->BC.MRI);
} else {
RA.getInstClobberList(Point, Written);
// When clobber list is conservative, it is clobbering all/most registers,
// a conservative estimate because it knows nothing about this call.
// For our purposes, assume it kills no registers/callee-saved regs
// because we don't really know what's going on.
if (RA.isConservative(Written)) {
Written.reset();
BC.MIA->getCalleeSavedRegs(Written, *this->BC.MRI);
}
}
Written.flip();
Next &= Written;
// Gen
if (!this->BC.MIA->isCFI(Point)) {
auto Used = BitVector(NumRegs, false);
this->BC.MIA->getUsedRegs(Point, Used, *this->BC.MRI);
RA.getInstUsedRegsList(Point, Used, /*GetClobbers*/false);
if (IsCall &&
(!BC.MIA->isTailCall(Point) || !BC.MIA->isConditionalBranch(Point))) {
// Never gen FLAGS from a non-conditional call... this is overly
// conservative
Used.reset(BC.MIA->getFlagsReg());
}
Next |= Used;
}
return Next;

View File

@ -13,6 +13,7 @@
#define LLVM_TOOLS_LLVM_BOLT_PASSES_REACHINGDEFORUSE_H
#include "DataflowAnalysis.h"
#include "RegAnalysis.h"
#include "llvm/Support/Timer.h"
namespace llvm {
@ -28,16 +29,16 @@ class ReachingDefOrUse
friend class DataflowAnalysis<ReachingDefOrUse<Def>, BitVector, !Def>;
public:
ReachingDefOrUse(const FrameAnalysis &FA, const BinaryContext &BC,
ReachingDefOrUse(const RegAnalysis &RA, const BinaryContext &BC,
BinaryFunction &BF)
: InstrsDataflowAnalysis<ReachingDefOrUse<Def>, !Def>(BC, BF), FA(FA) {}
: InstrsDataflowAnalysis<ReachingDefOrUse<Def>, !Def>(BC, BF), RA(RA) {}
virtual ~ReachingDefOrUse() {}
bool isReachedBy(MCPhysReg Reg, ExprIterator Candidates) {
for (auto I = Candidates; I != this->expr_end(); ++I) {
auto BV = BitVector(this->BC.MRI->getNumRegs(), false);
if (Def) {
FA.getInstClobberList(this->BC, **I, BV);
RA.getInstClobberList(**I, BV);
} else {
this->BC.MIA->getTouchedRegs(**I, BV, *this->BC.MRI);
}
@ -57,8 +58,8 @@ public:
}
protected:
/// Reference to the result of stack frame analysis
const FrameAnalysis &FA;
/// Reference to the result of reg analysis
const RegAnalysis &RA;
void preflight() {
// Populate our universe of tracked expressions with all instructions
@ -89,11 +90,11 @@ protected:
// getClobberedRegs for X and Y. If they intersect, return true
auto XClobbers = BitVector(this->BC.MRI->getNumRegs(), false);
auto YClobbers = BitVector(this->BC.MRI->getNumRegs(), false);
FA.getInstClobberList(this->BC, *X, XClobbers);
RA.getInstClobberList(*X, XClobbers);
// In defs, write after write -> kills first write
// In uses, write after access (read or write) -> kills access
if (Def)
FA.getInstClobberList(this->BC, *Y, YClobbers);
RA.getInstClobberList(*Y, YClobbers);
else
this->BC.MIA->getTouchedRegs(*Y, YClobbers, *this->BC.MRI);
// X kills Y if it clobbers Y completely -- this is a conservative approach.

207
bolt/Passes/RegAnalysis.cpp Normal file
View File

@ -0,0 +1,207 @@
#include "RegAnalysis.h"
#include "CallGraphWalker.h"
#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "ra"
using namespace llvm;
namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::OptionCategory BoltOptCategory;
cl::opt<bool> AssumeABI(
"assume-abi",
cl::desc("assume the ABI is never violated"),
cl::ZeroOrMore,
cl::init(false),
cl::cat(BoltOptCategory));
}
namespace llvm {
namespace bolt {
RegAnalysis::RegAnalysis(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunctionCallGraph &CG)
: BC(BC) {
CallGraphWalker CGWalker(BC, BFs, CG);
CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool {
BitVector RegsKilled = getFunctionClobberList(Func);
bool Updated = RegsKilledMap.find(Func) == RegsKilledMap.end() ||
RegsKilledMap[Func] != RegsKilled;
if (Updated)
RegsKilledMap[Func] = std::move(RegsKilled);
return Updated;
});
CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool {
BitVector RegsGen = getFunctionUsedRegsList(Func);
bool Updated = RegsGenMap.find(Func) == RegsGenMap.end() ||
RegsGenMap[Func] != RegsGen;
if (Updated)
RegsGenMap[Func] = std::move(RegsGen);
return Updated;
});
CGWalker.walk();
if (opts::Verbosity == 0) {
#ifndef NDEBUG
if (!DebugFlag || !isCurrentDebugType(DEBUG_TYPE))
return;
#else
return;
#endif
}
// This loop is for computing statistics only
for (auto &MapEntry : BFs) {
auto *Func = &MapEntry.second;
auto Iter = RegsKilledMap.find(Func);
assert(Iter != RegsKilledMap.end() &&
"Failed to compute all clobbers list");
if (Iter->second.all()) {
auto Count = Func->getExecutionCount();
if (Count != BinaryFunction::COUNT_NO_PROFILE)
CountFunctionsAllClobber += Count;
++NumFunctionsAllClobber;
}
DEBUG_WITH_TYPE("fa",
dbgs() << "Killed regs set for func: " << Func->getPrintName() << "\n";
const BitVector &RegsKilled = Iter->second;
int RegIdx = RegsKilled.find_first();
while (RegIdx != -1) {
dbgs() << "\tREG" << RegIdx;
RegIdx = RegsKilled.find_next(RegIdx);
};
dbgs() << "\nUsed regs set for func: " << Func->getPrintName() << "\n";
const BitVector &RegsUsed = RegsGenMap.find(Func)->second;
RegIdx = RegsUsed.find_first();
while (RegIdx != -1) {
dbgs() << "\tREG" << RegIdx;
RegIdx = RegsUsed.find_next(RegIdx);
};
dbgs() << "\n";
);
}
}
void RegAnalysis::beConservative(BitVector &Result) const {
if (!opts::AssumeABI) {
Result.set();
} else {
BitVector BV(BC.MRI->getNumRegs(), false);
BC.MIA->getCalleeSavedRegs(BV, *BC.MRI);
BV.flip();
Result |= BV;
}
}
bool RegAnalysis::isConservative(BitVector &Vec) const {
if (!opts::AssumeABI) {
return Vec.all();
} else {
BitVector BV(BC.MRI->getNumRegs(), false);
BC.MIA->getCalleeSavedRegs(BV, *BC.MRI);
BV |= Vec;
return BV.all();
}
}
void RegAnalysis::getInstUsedRegsList(const MCInst &Inst, BitVector &RegSet,
bool GetClobbers) const {
if (!BC.MIA->isCall(Inst)) {
if (GetClobbers)
BC.MIA->getClobberedRegs(Inst, RegSet, *BC.MRI);
else
BC.MIA->getUsedRegs(Inst, RegSet, *BC.MRI);
return;
}
const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst);
// If indirect call, we know nothing
if (TargetSymbol == nullptr) {
beConservative(RegSet);
return;
}
const auto *Function = BC.getFunctionForSymbol(TargetSymbol);
if (Function == nullptr) {
// Call to a function without a BinaryFunction object.
// This should be a call to a PLT entry, and since it is a trampoline to
// a DSO, we can't really know the code in advance.
beConservative(RegSet);
return;
}
if (GetClobbers) {
auto BV = RegsKilledMap.find(Function);
if (BV != RegsKilledMap.end()) {
RegSet |= BV->second;
return;
}
// Ignore calls to function whose clobber list wasn't yet calculated. This
// instruction will be evaluated again once we have info for the callee.
return;
}
auto BV = RegsGenMap.find(Function);
if (BV != RegsGenMap.end()) {
RegSet |= BV->second;
return;
}
}
void RegAnalysis::getInstClobberList(const MCInst &Inst,
BitVector &KillSet) const {
return getInstUsedRegsList(Inst, KillSet, /*GetClobbers*/ true);
}
BitVector RegAnalysis::getFunctionUsedRegsList(const BinaryFunction *Func) {
BitVector UsedRegs = BitVector(BC.MRI->getNumRegs(), false);
if (!Func->isSimple() || !Func->hasCFG()) {
beConservative(UsedRegs);
return UsedRegs;
}
for (const auto &BB : *Func) {
for (const auto &Inst : BB) {
getInstUsedRegsList(Inst, UsedRegs, /*GetClobbers*/false);
if (UsedRegs.all())
return UsedRegs;
}
}
return UsedRegs;
}
BitVector RegAnalysis::getFunctionClobberList(const BinaryFunction *Func) {
BitVector RegsKilled = BitVector(BC.MRI->getNumRegs(), false);
if (!Func->isSimple() || !Func->hasCFG()) {
beConservative(RegsKilled);
return RegsKilled;
}
for (const auto &BB : *Func) {
for (const auto &Inst : BB) {
getInstClobberList(Inst, RegsKilled);
if (RegsKilled.all())
return RegsKilled;
}
}
return RegsKilled;
}
void RegAnalysis::printStats() {
outs() << "BOLT-INFO REG ANALYSIS: Number of functions conservatively "
"treated as clobbering all registers: "
<< NumFunctionsAllClobber
<< format(" (%.1lf%% dyn cov)\n",
(100.0 * CountFunctionsAllClobber / CountDenominator));
}
}
}

82
bolt/Passes/RegAnalysis.h Normal file
View File

@ -0,0 +1,82 @@
//===--- Passes/RegAnalysis.h ---------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_REGANALYSIS_H
#define LLVM_TOOLS_LLVM_BOLT_PASSES_REGANALYSIS_H
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "BinaryFunctionCallGraph.h"
#include "llvm/ADT/BitVector.h"
#include <map>
namespace llvm {
namespace bolt {
/// Determine the set of registers read or clobbered for each instruction
/// in a BinaryFunction. If the instruction is a call, this analysis rely on
/// a call graph traversal to accurately extract the set of registers touched
/// after the call returns.
class RegAnalysis {
BinaryContext &BC;
/// Map functions to the set of registers they may overwrite starting at when
/// it is called until it returns to the caller.
std::map<const BinaryFunction *, BitVector> RegsKilledMap;
/// Similar concept above but for registers that are read in that function.
std::map<const BinaryFunction *, BitVector> RegsGenMap;
/// Analysis stats counters
uint64_t NumFunctionsAllClobber{0};
uint64_t CountFunctionsAllClobber{0};
uint64_t CountDenominator{0};
/// Helper function used to get the set of clobbered/used regs whenever
/// we know nothing about the function.
void beConservative(BitVector &Result) const;
/// Compute the set of registers \p Func may read from during its execution.
BitVector getFunctionUsedRegsList(const BinaryFunction *Func);
/// Compute the set of registers \p Func may write to during its execution,
/// starting at the point when it is called up until when it returns. Returns
/// a BitVector the size of the target number of registers, representing the
/// set of clobbered registers.
BitVector getFunctionClobberList(const BinaryFunction *Func);
public:
RegAnalysis(BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs,
BinaryFunctionCallGraph &CG);
/// Compute the set of registers \p Inst may read from, marking them in
/// \p RegSet. If GetClobbers is true, the set set the instr may write to.
/// Use the callgraph to fill out this info for calls.
void getInstUsedRegsList(const MCInst &Inst, BitVector &RegSet,
bool GetClobbers) const;
/// Compute the set of registers \p Inst may write to, marking them in
/// \p KillSet. If this is a call, try to get the set of registers the call
/// target will write to.
void getInstClobberList(const MCInst &Inst, BitVector &KillSet) const;
/// Return true iff Vec has a conservative estimation of used/clobbered regs,
/// expressing no specific knowledge of reg usage.
bool isConservative(BitVector &Vec) const;
/// Print stats about the quality of our analysis
void printStats();
};
}
}
#endif

View File

@ -41,7 +41,7 @@ void CalleeSavedAnalysis::analyzeSaves() {
DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
const MCInst *Prev = nullptr;
for (auto &Inst : BB) {
if (auto FIE = FA.getFIEFor(BC, Inst)) {
if (auto FIE = FA.getFIEFor(Inst)) {
if (!FIE->IsStore || !FIE->IsSimple || !FIE->IsStoreFromReg ||
FIE->StackOffset >= 0) {
Prev = &Inst;
@ -86,7 +86,7 @@ void CalleeSavedAnalysis::analyzeRestores() {
const MCInst *Prev = nullptr;
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
auto &Inst = *I;
if (auto FIE = FA.getFIEFor(BC, Inst)) {
if (auto FIE = FA.getFIEFor(Inst)) {
if (!FIE->IsLoad || !FIE->IsSimple || !CalleeSaved[FIE->RegOrImm] ||
FIE->StackOffset >= 0) {
Prev = &Inst;
@ -229,7 +229,7 @@ void StackLayoutModifier::classifyStackAccesses() {
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
auto &Inst = *I;
checkFramePointerInitialization(Inst);
auto FIEX = FA.getFIEFor(BC, Inst);
auto FIEX = FA.getFIEFor(Inst);
if (!FIEX) {
Prev = &Inst;
continue;
@ -346,7 +346,7 @@ bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
if (!IsSimple || !BC.MIA->isPush(*DeletedPush))
return false;
auto FIE = FA.getFIEFor(BC, *DeletedPush);
auto FIE = FA.getFIEFor(*DeletedPush);
if (!FIE)
return false;
@ -370,7 +370,7 @@ bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
}
bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
auto FIE = FA.getFIEFor(BC, *DeletedPush);
auto FIE = FA.getFIEFor(*DeletedPush);
if (!FIE)
return false;
int64_t RegionAddr = FIE->StackOffset;
@ -414,7 +414,7 @@ bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
continue;
}
auto FIE = FA.getFIEFor(BC, Inst);
auto FIE = FA.getFIEFor(Inst);
assert(FIE);
if (FIE->StackPtrReg == BC.MIA->getStackPointer() && Slot < RegionAddr)
continue;
@ -499,7 +499,7 @@ bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
continue;
}
auto FIE = FA.getFIEFor(BC, Inst);
auto FIE = FA.getFIEFor(Inst);
assert(FIE);
if (FIE->StackPtrReg == BC.MIA->getStackPointer() && Slot < RegionAddr)
continue;

View File

@ -17,10 +17,11 @@
namespace llvm {
namespace bolt {
StackAvailableExpressions::StackAvailableExpressions(const FrameAnalysis &FA,
StackAvailableExpressions::StackAvailableExpressions(const RegAnalysis &RA,
const FrameAnalysis &FA,
const BinaryContext &BC,
BinaryFunction &BF)
: InstrsDataflowAnalysis(BC, BF), FA(FA) {}
: InstrsDataflowAnalysis(BC, BF), RA(RA), FA(FA) {}
void StackAvailableExpressions::preflight() {
DEBUG(dbgs() << "Starting StackAvailableExpressions on \""
@ -31,7 +32,7 @@ void StackAvailableExpressions::preflight() {
// program.
for (auto &BB : Func) {
for (auto &Inst : BB) {
auto FIE = FA.getFIEFor(BC, Inst);
auto FIE = FA.getFIEFor(Inst);
if (!FIE)
continue;
if (FIE->IsStore == true && FIE->IsSimple == true) {
@ -80,8 +81,8 @@ bool isLoadRedundant(const FrameIndexEntry &LoadFIE,
bool StackAvailableExpressions::doesXKillsY(const MCInst *X, const MCInst *Y) {
// if both are stores, and both store to the same stack location, return
// true
auto FIEX = FA.getFIEFor(BC, *X);
auto FIEY = FA.getFIEFor(BC, *Y);
auto FIEX = FA.getFIEFor(*X);
auto FIEY = FA.getFIEFor(*Y);
if (FIEX && FIEY) {
if (isLoadRedundant(*FIEX, *FIEY))
return false;
@ -93,14 +94,14 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst *X, const MCInst *Y) {
// getClobberedRegs for X and Y. If they intersect, return true
BitVector XClobbers = BitVector(BC.MRI->getNumRegs(), false);
BitVector YClobbers = BitVector(BC.MRI->getNumRegs(), false);
FA.getInstClobberList(BC, *X, XClobbers);
RA.getInstClobberList(*X, XClobbers);
// If Y is a store to stack, its clobber list is its source reg. This is
// different than the rest because we want to check if the store source
// reaches its corresponding load untouched.
if (FIEY && FIEY->IsStore == true && FIEY->IsStoreFromReg) {
YClobbers.set(FIEY->RegOrImm);
} else {
FA.getInstClobberList(BC, *Y, YClobbers);
RA.getInstClobberList(*Y, YClobbers);
}
XClobbers &= YClobbers;
return XClobbers.any();
@ -121,7 +122,7 @@ BitVector StackAvailableExpressions::computeNext(const MCInst &Point,
}
}
// Gen
if (auto FIE = FA.getFIEFor(BC, Point)) {
if (auto FIE = FA.getFIEFor(Point)) {
if (FIE->IsStore == true && FIE->IsSimple == true)
Next.set(ExprToIdx[&Point]);
}

View File

@ -13,6 +13,7 @@
#define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKAVAILABLEEXPRESSIONS_H
#include "DataflowAnalysis.h"
#include "RegAnalysis.h"
#include "llvm/Support/Timer.h"
namespace llvm {
@ -25,7 +26,7 @@ class StackAvailableExpressions
friend class DataflowAnalysis<StackAvailableExpressions, BitVector>;
public:
StackAvailableExpressions(const FrameAnalysis &FA,
StackAvailableExpressions(const RegAnalysis &RA, const FrameAnalysis &FA,
const BinaryContext &BC, BinaryFunction &BF);
virtual ~StackAvailableExpressions() {}
@ -35,7 +36,7 @@ public:
}
protected:
/// Reference to the result of stack frame analysis
const RegAnalysis &RA;
const FrameAnalysis &FA;
void preflight();

View File

@ -22,7 +22,7 @@ bool StackReachingUses::isStoreUsed(const FrameIndexEntry &StoreFIE,
for (auto I = Candidates; I != expr_end(); ++I) {
const MCInst *ReachingInst = *I;
if (IncludeLocalAccesses) {
if (auto FIEY = FA.getFIEFor(BC, *ReachingInst)) {
if (auto FIEY = FA.getFIEFor(*ReachingInst)) {
assert(FIEY->IsLoad == 1);
if (StoreFIE.StackOffset + StoreFIE.Size > FIEY->StackOffset &&
StoreFIE.StackOffset < FIEY->StackOffset + FIEY->Size) {
@ -30,7 +30,7 @@ bool StackReachingUses::isStoreUsed(const FrameIndexEntry &StoreFIE,
}
}
}
auto Args = FA.getArgAccessesFor(BC, *ReachingInst);
auto Args = FA.getArgAccessesFor(*ReachingInst);
if (!Args)
continue;
if (Args->AssumeEverything) {
@ -55,14 +55,14 @@ void StackReachingUses::preflight() {
// program.
for (auto &BB : Func) {
for (auto &Inst : BB) {
if (auto FIE = FA.getFIEFor(BC, Inst)) {
if (auto FIE = FA.getFIEFor(Inst)) {
if (FIE->IsLoad == true) {
Expressions.push_back(&Inst);
ExprToIdx[&Inst] = NumInstrs++;
continue;
}
}
auto AA = FA.getArgAccessesFor(BC, Inst);
auto AA = FA.getArgAccessesFor(Inst);
if (AA && (!AA->Set.empty() || AA->AssumeEverything)) {
Expressions.push_back(&Inst);
ExprToIdx[&Inst] = NumInstrs++;
@ -74,8 +74,8 @@ void StackReachingUses::preflight() {
bool StackReachingUses::doesXKillsY(const MCInst *X, const MCInst *Y) {
// if X is a store to the same stack location and the bytes fetched is a
// superset of those bytes affected by the load in Y, return true
auto FIEX = FA.getFIEFor(BC, *X);
auto FIEY = FA.getFIEFor(BC, *Y);
auto FIEX = FA.getFIEFor(*X);
auto FIEY = FA.getFIEFor(*Y);
if (FIEX && FIEY) {
if (FIEX->IsStore == true && FIEY->IsLoad == true &&
FIEX->StackOffset <= FIEY->StackOffset &&
@ -98,11 +98,11 @@ BitVector StackReachingUses::computeNext(const MCInst &Point,
}
};
// Gen
if (auto FIE = FA.getFIEFor(BC, Point)) {
if (auto FIE = FA.getFIEFor(Point)) {
if (FIE->IsLoad == true)
Next.set(ExprToIdx[&Point]);
}
auto AA = FA.getArgAccessesFor(BC, Point);
auto AA = FA.getArgAccessesFor(Point);
if (AA && (!AA->Set.empty() || AA->AssumeEverything))
Next.set(ExprToIdx[&Point]);
return Next;