From 2c2309429905bcfa25fa0ebd50852c7fd2a7a637 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Fri, 2 Jun 2017 16:57:22 -0700 Subject: [PATCH] Split FrameAnalysis and improve LivenessAnalysis Summary: Split FrameAnalysis into FrameAnalysis and RegAnalysis, since some optimizations only require register information about functions, not frame information. Refactor callgraph walking code into the CallGraphWalker class, allowing any analysis that depend on the call graph to easily traverse it via a visitor pattern. Also fix LivenessAnalysis, which was broken because it was not considering registers read into callees and incorporating this into caller. (cherry picked from FBD5177901) --- bolt/Passes/BinaryFunctionCallGraph.cpp | 3 + bolt/Passes/CMakeLists.txt | 2 + bolt/Passes/CallGraphWalker.cpp | 46 +++++ bolt/Passes/CallGraphWalker.h | 67 +++++++ bolt/Passes/DataflowInfoManager.cpp | 12 +- bolt/Passes/DataflowInfoManager.h | 17 +- bolt/Passes/FrameAnalysis.cpp | 213 ++++------------------ bolt/Passes/FrameAnalysis.h | 93 +++------- bolt/Passes/FrameOptimizer.cpp | 23 ++- bolt/Passes/FrameOptimizer.h | 4 +- bolt/Passes/IndirectCallPromotion.cpp | 14 +- bolt/Passes/LivenessAnalysis.h | 51 +++++- bolt/Passes/ReachingDefOrUse.h | 15 +- bolt/Passes/RegAnalysis.cpp | 207 +++++++++++++++++++++ bolt/Passes/RegAnalysis.h | 82 +++++++++ bolt/Passes/ShrinkWrapping.cpp | 14 +- bolt/Passes/StackAvailableExpressions.cpp | 17 +- bolt/Passes/StackAvailableExpressions.h | 5 +- bolt/Passes/StackReachingUses.cpp | 16 +- 19 files changed, 584 insertions(+), 317 deletions(-) create mode 100644 bolt/Passes/CallGraphWalker.cpp create mode 100644 bolt/Passes/CallGraphWalker.h create mode 100644 bolt/Passes/RegAnalysis.cpp create mode 100644 bolt/Passes/RegAnalysis.h diff --git a/bolt/Passes/BinaryFunctionCallGraph.cpp b/bolt/Passes/BinaryFunctionCallGraph.cpp index 16ea4bc376dc..5d29cb64d9fd 100644 --- a/bolt/Passes/BinaryFunctionCallGraph.cpp +++ b/bolt/Passes/BinaryFunctionCallGraph.cpp @@ -12,6 +12,7 @@ #include "BinaryFunctionCallGraph.h" #include "BinaryFunction.h" #include "BinaryContext.h" +#include "llvm/Support/Timer.h" #define DEBUG_TYPE "callgraph" @@ -30,6 +31,7 @@ CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, } std::deque BinaryFunctionCallGraph::buildTraversalOrder() { + NamedRegionTimer T1("Build cg traversal order", "CG breakdown", true); std::deque TopologicalOrder; enum NodeStatus { NEW, VISITING, VISITED }; std::vector NodeStatus(Funcs.size()); @@ -73,6 +75,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, bool IncludeColdCalls, bool UseFunctionHotSize, bool UseEdgeCounts) { + NamedRegionTimer T1("Callgraph construction", "CG breakdown", true); BinaryFunctionCallGraph Cg; // Add call graph nodes. diff --git a/bolt/Passes/CMakeLists.txt b/bolt/Passes/CMakeLists.txt index 7d9714893c45..b3114c2a05e6 100644 --- a/bolt/Passes/CMakeLists.txt +++ b/bolt/Passes/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMBOLTPasses BinaryPasses.cpp BinaryFunctionCallGraph.cpp CallGraph.cpp + CallGraphWalker.cpp DataflowAnalysis.cpp DataflowInfoManager.cpp FrameAnalysis.cpp @@ -13,6 +14,7 @@ add_llvm_library(LLVMBOLTPasses Inliner.cpp LivenessAnalysis.cpp PettisAndHansen.cpp + RegAnalysis.cpp ReorderAlgorithm.cpp ReorderFunctions.cpp ShrinkWrapping.cpp diff --git a/bolt/Passes/CallGraphWalker.cpp b/bolt/Passes/CallGraphWalker.cpp new file mode 100644 index 000000000000..00f9d75a8dcd --- /dev/null +++ b/bolt/Passes/CallGraphWalker.cpp @@ -0,0 +1,46 @@ +#include "CallGraphWalker.h" +#include "llvm/Support/Timer.h" + +namespace llvm { +namespace bolt { + +void CallGraphWalker::traverseCG() { + NamedRegionTimer T1("CG Traversal", "CG breakdown", true); + std::queue Queue; + std::set InQueue; + + for (auto *Func : TopologicalCGOrder) { + Queue.push(Func); + InQueue.insert(Func); + } + + while (!Queue.empty()) { + auto *Func = Queue.front(); + Queue.pop(); + InQueue.erase(Func); + + bool Changed{false}; + for (auto Visitor : Visitors) { + bool CurVisit = Visitor(Func); + Changed = Changed || CurVisit; + } + + if (Changed) { + for (auto CallerID : CG.predecessors(CG.getNodeId(Func))) { + BinaryFunction *CallerFunc = CG.nodeIdToFunc(CallerID); + if (InQueue.count(CallerFunc)) + continue; + Queue.push(CallerFunc); + InQueue.insert(CallerFunc); + } + } + } +} + +void CallGraphWalker::walk() { + TopologicalCGOrder = CG.buildTraversalOrder(); + traverseCG(); +} + +} +} diff --git a/bolt/Passes/CallGraphWalker.h b/bolt/Passes/CallGraphWalker.h new file mode 100644 index 000000000000..195e536fd07d --- /dev/null +++ b/bolt/Passes/CallGraphWalker.h @@ -0,0 +1,67 @@ +//===--- Passes/CallGraphWalker.h -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPHWALKER_H +#define LLVM_TOOLS_LLVM_BOLT_PASSES_CALLGRAPHWALKER_H + +#include "BinaryContext.h" +#include "BinaryFunction.h" +#include "BinaryFunctionCallGraph.h" +#include +#include +#include +#include + +namespace llvm { +namespace bolt { + +/// Perform a bottom-up walk of the call graph with the intent of computing +/// a property that depends on callees. In the event of a CG cycles, this will +/// re-visit functions until their observed property converges. +class CallGraphWalker { + BinaryContext &BC; + std::map &BFs; + BinaryFunctionCallGraph &CG; + + /// DFS or reverse post-ordering of the call graph nodes to allow us to + /// traverse the call graph bottom-up + std::deque TopologicalCGOrder; + + /// Stores all visitor functions to call when traversing the call graph + typedef std::function CallbackTy; + std::vector Visitors; + + /// Do the bottom-up traversal + void traverseCG(); + +public: + /// Initialize core context references but don't do anything yet + CallGraphWalker(BinaryContext &BC, std::map &BFs, + BinaryFunctionCallGraph &CG) + : BC(BC), BFs(BFs), CG(CG) {} + + /// Register a new callback function to be called for each function when + /// traversing the call graph bottom-up. Function should return true iff + /// whatever information it is keeping track of has changed. Function must + /// converge with time, ie, it must eventually return false, otherwise the + /// call graph walk will never finish. + void registerVisitor(CallbackTy Callback) { + Visitors.emplace_back(Callback); + } + + /// Build the call graph, establish a traversal order and traverse it. + void walk(); +}; + +} +} + +#endif diff --git a/bolt/Passes/DataflowInfoManager.cpp b/bolt/Passes/DataflowInfoManager.cpp index e280c1554b3d..c9a1e416db1a 100644 --- a/bolt/Passes/DataflowInfoManager.cpp +++ b/bolt/Passes/DataflowInfoManager.cpp @@ -18,8 +18,8 @@ namespace bolt { ReachingDefOrUse &DataflowInfoManager::getReachingDefs() { if (RD) return *RD; - assert(FA && "FrameAnalysis required"); - RD.reset(new ReachingDefOrUse(*FA, BC, BF)); + assert(RA && "RegAnalysis required"); + RD.reset(new ReachingDefOrUse(*RA, BC, BF)); RD->run(); return *RD; } @@ -31,8 +31,8 @@ void DataflowInfoManager::invalidateReachingDefs() { ReachingDefOrUse &DataflowInfoManager::getReachingUses() { if (RU) return *RU; - assert(FA && "FrameAnalysis required"); - RU.reset(new ReachingDefOrUse(*FA, BC, BF)); + assert(RA && "RegAnalysis required"); + RU.reset(new ReachingDefOrUse(*RA, BC, BF)); RU->run(); return *RU; } @@ -44,8 +44,8 @@ void DataflowInfoManager::invalidateReachingUses() { LivenessAnalysis &DataflowInfoManager::getLivenessAnalysis() { if (LA) return *LA; - assert(FA && "FrameAnalysis required"); - LA.reset(new LivenessAnalysis(*FA, BC, BF)); + assert(RA && "RegAnalysis required"); + LA.reset(new LivenessAnalysis(*RA, BC, BF)); LA->run(); return *LA; } diff --git a/bolt/Passes/DataflowInfoManager.h b/bolt/Passes/DataflowInfoManager.h index 34a6b64bef15..c527650d1d74 100644 --- a/bolt/Passes/DataflowInfoManager.h +++ b/bolt/Passes/DataflowInfoManager.h @@ -12,14 +12,15 @@ #ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_DATAFLOWINFOMANAGER_H #define LLVM_TOOLS_LLVM_BOLT_PASSES_DATAFLOWINFOMANAGER_H -#include "FrameAnalysis.h" -#include "ReachingDefOrUse.h" -#include "StackReachingUses.h" #include "DominatorAnalysis.h" -#include "StackPointerTracking.h" -#include "ReachingInsns.h" +#include "FrameAnalysis.h" #include "LivenessAnalysis.h" +#include "ReachingDefOrUse.h" +#include "ReachingInsns.h" +#include "RegAnalysis.h" #include "StackAllocationAnalysis.h" +#include "StackPointerTracking.h" +#include "StackReachingUses.h" namespace llvm { namespace bolt { @@ -29,6 +30,7 @@ namespace bolt { /// recompute it. Also provide an interface for data invalidation when the /// analysis is outdated after a transform pass modified the function. class DataflowInfoManager { + const RegAnalysis *RA; const FrameAnalysis *FA; const BinaryContext &BC; BinaryFunction &BF; @@ -46,8 +48,9 @@ class DataflowInfoManager { InsnToBB; public: - DataflowInfoManager(const FrameAnalysis *FA, const BinaryContext &BC, - BinaryFunction &BF) : FA(FA), BC(BC), BF(BF) {}; + DataflowInfoManager(const BinaryContext &BC, BinaryFunction &BF, + const RegAnalysis *RA, const FrameAnalysis *FA) + : RA(RA), FA(FA), BC(BC), BF(BF){}; /// Helper function to fetch the parent BB associated with a program point /// If PP is a BB itself, then return itself (cast to a BinaryBasicBlock) diff --git a/bolt/Passes/FrameAnalysis.cpp b/bolt/Passes/FrameAnalysis.cpp index 38d770ad679a..3cd2ce883b59 100644 --- a/bolt/Passes/FrameAnalysis.cpp +++ b/bolt/Passes/FrameAnalysis.cpp @@ -9,6 +9,7 @@ // //===----------------------------------------------------------------------===// #include "FrameAnalysis.h" +#include "CallGraphWalker.h" #include #define DEBUG_TYPE "fa" @@ -213,9 +214,8 @@ public: } // end anonymous namespace -void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst, - ArgAccesses &&AA) { - if (auto OldAA = getArgAccessesFor(BC, Inst)) { +void FrameAnalysis::addArgAccessesFor(MCInst &Inst, ArgAccesses &&AA) { + if (auto OldAA = getArgAccessesFor(Inst)) { if (OldAA->AssumeEverything) return; *OldAA = std::move(AA); @@ -231,13 +231,12 @@ void FrameAnalysis::addArgAccessesFor(const BinaryContext &BC, MCInst &Inst, ArgAccessesVector.emplace_back(std::move(AA)); } -void FrameAnalysis::addArgInStackAccessFor(const BinaryContext &BC, - MCInst &Inst, +void FrameAnalysis::addArgInStackAccessFor(MCInst &Inst, const ArgInStackAccess &Arg) { - auto AA = getArgAccessesFor(BC, Inst); + auto AA = getArgAccessesFor(Inst); if (!AA) { - addArgAccessesFor(BC, Inst, ArgAccesses(false)); - AA = getArgAccessesFor(BC, Inst); + addArgAccessesFor(Inst, ArgAccesses(false)); + AA = getArgAccessesFor(Inst); assert(AA && "Object setup failed"); } auto &Set = AA->Set; @@ -245,15 +244,13 @@ void FrameAnalysis::addArgInStackAccessFor(const BinaryContext &BC, Set.emplace(Arg); } -void FrameAnalysis::addFIEFor(const BinaryContext &BC, MCInst &Inst, - const FrameIndexEntry &FIE) { +void FrameAnalysis::addFIEFor(MCInst &Inst, const FrameIndexEntry &FIE) { BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "FrameAccessEntry", (unsigned)FIEVector.size()); FIEVector.emplace_back(FIE); } -ErrorOr -FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, const MCInst &Inst) { +ErrorOr FrameAnalysis::getArgAccessesFor(const MCInst &Inst) { if (auto Idx = BC.MIA->tryGetAnnotationAs(Inst, "ArgAccessEntry")) { assert(ArgAccessesVector.size() > *Idx && "Out of bounds"); return ArgAccessesVector[*Idx]; @@ -262,8 +259,7 @@ FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, const MCInst &Inst) { } ErrorOr -FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, - const MCInst &Inst) const { +FrameAnalysis::getArgAccessesFor(const MCInst &Inst) const { if (auto Idx = BC.MIA->tryGetAnnotationAs(Inst, "ArgAccessEntry")) { assert(ArgAccessesVector.size() > *Idx && "Out of bounds"); return ArgAccessesVector[*Idx]; @@ -272,7 +268,7 @@ FrameAnalysis::getArgAccessesFor(const BinaryContext &BC, } ErrorOr -FrameAnalysis::getFIEFor(const BinaryContext &BC, const MCInst &Inst) const { +FrameAnalysis::getFIEFor(const MCInst &Inst) const { if (auto Idx = BC.MIA->tryGetAnnotationAs(Inst, "FrameAccessEntry")) { assert(FIEVector.size() > *Idx && "Out of bounds"); @@ -281,130 +277,17 @@ FrameAnalysis::getFIEFor(const BinaryContext &BC, const MCInst &Inst) const { return make_error_code(errc::result_out_of_range); } -void FrameAnalysis::getInstClobberList(const BinaryContext &BC, - const MCInst &Inst, - BitVector &KillSet) const { - if (!BC.MIA->isCall(Inst)) { - BC.MIA->getClobberedRegs(Inst, KillSet, *BC.MRI); - return; - } +void FrameAnalysis::traverseCG(BinaryFunctionCallGraph &CG) { + CallGraphWalker CGWalker(BC, BFs, CG); - const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst); - // If indirect call, kill set should have all elements - if (TargetSymbol == nullptr) { - KillSet.set(0, KillSet.size()); - return; - } + CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool { + return computeArgsAccessed(*Func); + }); - const auto *Function = BC.getFunctionForSymbol(TargetSymbol); - if (Function == nullptr) { - // Call to a function without a BinaryFunction object. - // This should be a call to a PLT entry, and since it is a trampoline to - // a DSO, we can't really know the code in advance. Conservatively assume - // everything is clobbered. - KillSet.set(0, KillSet.size()); - return; - } - auto BV = RegsKilledMap.find(Function); - if (BV != RegsKilledMap.end()) { - KillSet |= BV->second; - return; - } - // Ignore calls to function whose clobber list wasn't yet calculated. This - // instruction will be evaluated again once we have info for the callee. - return; + CGWalker.walk(); } -BitVector FrameAnalysis::getFunctionClobberList(const BinaryContext &BC, - const BinaryFunction *Func) { - BitVector RegsKilled = BitVector(BC.MRI->getNumRegs(), false); - - if (!Func->isSimple() || !Func->hasCFG()) { - RegsKilled.set(0, RegsKilled.size()); - return RegsKilled; - } - - for (const auto &BB : *Func) { - for (const auto &Inst : BB) { - getInstClobberList(BC, Inst, RegsKilled); - } - } - - return RegsKilled; -} - -void FrameAnalysis::buildClobberMap(const BinaryContext &BC) { - std::queue Queue; - std::set InQueue; - - for (auto *Func : TopologicalCGOrder) { - Queue.push(Func); - InQueue.insert(Func); - } - - while (!Queue.empty()) { - auto *Func = Queue.front(); - Queue.pop(); - InQueue.erase(Func); - - BitVector RegsKilled = getFunctionClobberList(BC, Func); - bool ArgsUpdated = ClobberAnalysisOnly ? false : computeArgsAccessed(BC, *Func); - bool RegsUpdated = false; - - if (RegsKilledMap.find(Func) == RegsKilledMap.end()) { - RegsKilledMap[Func] = std::move(RegsKilled); - } else { - RegsUpdated = RegsKilledMap[Func] != RegsKilled; - if (RegsUpdated) - RegsKilledMap[Func] = std::move(RegsKilled); - } - - if (RegsUpdated || ArgsUpdated) { - for (auto Caller : Cg.predecessors(Cg.getNodeId(Func))) { - BinaryFunction *CallerFunc = Cg.nodeIdToFunc(Caller); - if (!InQueue.count(CallerFunc)) { - InQueue.insert(CallerFunc); - Queue.push(CallerFunc); - } - } - } - } - - if (opts::Verbosity == 0) { -#ifndef NDEBUG - if (!DebugFlag || !isCurrentDebugType("fa")) - return; -#else - return; -#endif - } - - // This loop is for computing statistics only - for (auto *Func : TopologicalCGOrder) { - auto Iter = RegsKilledMap.find(Func); - assert(Iter != RegsKilledMap.end() && - "Failed to compute all clobbers list"); - if (Iter->second.all()) { - auto Count = Func->getExecutionCount(); - if (Count != BinaryFunction::COUNT_NO_PROFILE) - CountFunctionsAllClobber += Count; - ++NumFunctionsAllClobber; - } - DEBUG_WITH_TYPE("fa", - dbgs() << "Killed regs set for func: " << Func->getPrintName() << "\n"; - const BitVector &RegsKilled = Iter->second; - int RegIdx = RegsKilled.find_first(); - while (RegIdx != -1) { - dbgs() << "\tREG" << RegIdx; - RegIdx = RegsKilled.find_next(RegIdx); - }; - dbgs() << "\n"; - ); - } -} - -bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC, - const BinaryFunction &BF, MCInst &Inst, +bool FrameAnalysis::updateArgsTouchedFor(const BinaryFunction &BF, MCInst &Inst, int CurOffset) { if (!BC.MIA->isCall(Inst)) return false; @@ -413,7 +296,7 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC, const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst); // If indirect call, we conservatively assume it accesses all stack positions if (TargetSymbol == nullptr) { - addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true)); + addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true)); bool Updated{false}; if (!FunctionsRequireAlignment.count(&BF)) { Updated = true; @@ -426,7 +309,7 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC, // Call to a function without a BinaryFunction object. Conservatively assume // it accesses all stack positions if (Function == nullptr) { - addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true)); + addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true)); bool Updated{false}; if (!FunctionsRequireAlignment.count(&BF)) { Updated = true; @@ -459,27 +342,25 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryContext &BC, if (CurOffset == StackPointerTracking::EMPTY || CurOffset == StackPointerTracking::SUPERPOSITION) { - addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true)); + addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true)); return Changed; } for (auto Elem : Iter->second) { if (Elem.first == -1) { - addArgAccessesFor(BC, Inst, ArgAccesses(/*AssumeEverything=*/true)); + addArgAccessesFor(Inst, ArgAccesses(/*AssumeEverything=*/true)); break; } DEBUG(dbgs() << "Added arg in stack access annotation " << CurOffset + Elem.first << "\n"); addArgInStackAccessFor( - BC, Inst, - ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first, - /*Size=*/Elem.second}); + Inst, ArgInStackAccess{/*StackOffset=*/CurOffset + Elem.first, + /*Size=*/Elem.second}); } return Changed; } -bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC, - BinaryFunction &BF) { +bool FrameAnalysis::computeArgsAccessed(BinaryFunction &BF) { if (!BF.isSimple() || !BF.hasCFG()) { DEBUG(dbgs() << "Treating " << BF.getPrintName() << " conservatively.\n"); bool Updated = false; @@ -505,7 +386,7 @@ bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC, // Check for calls -- attach stack accessing info to them regarding their // target - if (updateArgsTouchedFor(BC, BF, Inst, FAA.getSPOffset())) + if (updateArgsTouchedFor(BF, Inst, FAA.getSPOffset())) UpdatedArgsTouched = true; // Check for stack accesses that affect callers @@ -548,8 +429,7 @@ bool FrameAnalysis::computeArgsAccessed(const BinaryContext &BC, return UpdatedArgsTouched || UpdatedAlignedStatus; } -bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC, - BinaryFunction &BF) { +bool FrameAnalysis::restoreFrameIndex(BinaryFunction &BF) { FrameAccessAnalysis FAA(BC, BF); DEBUG(dbgs() << "Restoring frame indices for \"" << BF.getPrintName() @@ -572,7 +452,7 @@ bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC, const FrameIndexEntry &FIE = FAA.getFIE(); - addFIEFor(BC, Inst, FIE); + addFIEFor(Inst, FIE); DEBUG({ dbgs() << "Frame index annotation " << FIE << " added to:\n"; BC.printInstruction(dbgs(), Inst, 0, &BF, true); @@ -582,8 +462,7 @@ bool FrameAnalysis::restoreFrameIndex(const BinaryContext &BC, return true; } -void FrameAnalysis::cleanAnnotations(const BinaryContext &BC, - std::map &BFs) { +void FrameAnalysis::cleanAnnotations() { for (auto &I : BFs) { for (auto &BB : I.second) { for (auto &Inst : BB) { @@ -594,24 +473,15 @@ void FrameAnalysis::cleanAnnotations(const BinaryContext &BC, } } -void FrameAnalysis::runOnFunctions(BinaryContext &BC, - std::map &BFs, - std::set &) { - { - NamedRegionTimer T1("Callgraph construction", "FOP breakdown", true); - Cg = buildCallGraph(BC, BFs); - } - { - NamedRegionTimer T1("build cg traversal order", "FOP breakdown", true); - TopologicalCGOrder = Cg.buildTraversalOrder(); - } - { - NamedRegionTimer T1("build clobber map", "FOP breakdown", true); - buildClobberMap(BC); - } +FrameAnalysis::FrameAnalysis(BinaryContext &BC, + std::map &BFs, + BinaryFunctionCallGraph &CG) + : BC(BC), BFs(BFs) { + // Position 0 of the vector should be always associated with "assume access + // everything". + ArgAccessesVector.emplace_back(ArgAccesses(/*AssumeEverything*/ true)); - if (ClobberAnalysisOnly) - return; + traverseCG(CG); for (auto &I : BFs) { auto Count = I.second.getExecutionCount(); @@ -630,7 +500,7 @@ void FrameAnalysis::runOnFunctions(BinaryContext &BC, { NamedRegionTimer T1("restore frame index", "FOP breakdown", true); - if (!restoreFrameIndex(BC, I.second)) { + if (!restoreFrameIndex(I.second)) { ++NumFunctionsFailedRestoreFI; auto Count = I.second.getExecutionCount(); if (Count != BinaryFunction::COUNT_NO_PROFILE) @@ -643,12 +513,7 @@ void FrameAnalysis::runOnFunctions(BinaryContext &BC, } void FrameAnalysis::printStats() { - outs() << "BOLT-INFO FRAME ANALYSIS: Number of functions conservatively " - "treated as clobbering all registers: " - << NumFunctionsAllClobber - << format(" (%.1lf%% dyn cov)\n", - (100.0 * CountFunctionsAllClobber / CountDenominator)) - << "BOLT-INFO FRAME ANALYSIS: " << NumFunctionsNotOptimized + outs() << "BOLT-INFO FRAME ANALYSIS: " << NumFunctionsNotOptimized << " function(s) " << format("(%.1lf%% dyn cov)", (100.0 * CountFunctionsNotOptimized / CountDenominator)) diff --git a/bolt/Passes/FrameAnalysis.h b/bolt/Passes/FrameAnalysis.h index b182d84bcb78..69c188c2e2e3 100644 --- a/bolt/Passes/FrameAnalysis.h +++ b/bolt/Passes/FrameAnalysis.h @@ -12,8 +12,8 @@ #ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEANALYSIS_H #define LLVM_TOOLS_LLVM_BOLT_PASSES_FRAMEANALYSIS_H -#include "BinaryPasses.h" #include "BinaryFunctionCallGraph.h" +#include "BinaryPasses.h" #include "StackPointerTracking.h" namespace llvm { @@ -111,17 +111,9 @@ raw_ostream &operator<<(raw_ostream &OS, /// ... callee may access any position of our current stack frame /// } /// -class FrameAnalysis : public BinaryFunctionPass { - /// Call graph info - BinaryFunctionCallGraph Cg; - - /// DFS or reverse post-ordering of the call graph nodes to allow us to - /// traverse the call graph bottom-up - std::deque TopologicalCGOrder; - - /// Map functions to the set of registers they may overwrite starting at when - /// it is called until it returns to the caller. - std::map RegsKilledMap; +class FrameAnalysis { + BinaryContext &BC; + std::map &BFs; /// Map functions to the set of tuples representing /// accesses to stack positions that belongs to caller @@ -142,70 +134,44 @@ class FrameAnalysis : public BinaryFunctionPass { std::vector FIEVector; /// Analysis stats counters - uint64_t NumFunctionsAllClobber{0}; - uint64_t CountFunctionsAllClobber{0}; uint64_t NumFunctionsNotOptimized{0}; uint64_t NumFunctionsFailedRestoreFI{0}; uint64_t CountFunctionsNotOptimized{0}; uint64_t CountFunctionsFailedRestoreFI{0}; uint64_t CountDenominator{0}; - /// If this flag is set to true, the analysis will never run completely, - /// but will stop after callgraph and a clobber analysis for every function - /// has been computed. - bool ClobberAnalysisOnly{false}; - /// Convenience functions for appending MCAnnotations to instructions with /// our specific data - void addArgAccessesFor(const BinaryContext &BC, MCInst &Inst, - ArgAccesses &&AA); - void addArgInStackAccessFor(const BinaryContext &BC, MCInst &Inst, - const ArgInStackAccess &Arg); - void addFIEFor(const BinaryContext &BC, MCInst &Inst, - const FrameIndexEntry &FIE); - - /// Compute the set of registers \p Func may write to during its execution, - /// starting at the point when it is called up until when it returns. Returns - /// a BitVector the size of the target number of registers, representing the - /// set of clobbered registers. - BitVector getFunctionClobberList(const BinaryContext &BC, - const BinaryFunction *Func); + void addArgAccessesFor(MCInst &Inst, ArgAccesses &&AA); + void addArgInStackAccessFor(MCInst &Inst, const ArgInStackAccess &Arg); + void addFIEFor(MCInst &Inst, const FrameIndexEntry &FIE); /// Perform the step of building the set of registers clobbered by each - /// function execution, populating RegsKilledMap. - void buildClobberMap(const BinaryContext &BC); + /// function execution, populating RegsKilledMap and RegsGenMap. + void traverseCG(BinaryFunctionCallGraph &CG); /// Analyzes an instruction and if it is a call, checks the called function /// to record which args in stack are accessed, if any. Returns true if /// the args data associated with this instruction were updated. - bool updateArgsTouchedFor(const BinaryContext &BC, const BinaryFunction &BF, - MCInst &Inst, int CurOffset); + bool updateArgsTouchedFor(const BinaryFunction &BF, MCInst &Inst, + int CurOffset); /// Performs a pass over \p BF to check for accesses to arguments in stack, /// flagging those as accessing the caller stack frame. All functions called /// by \p BF must have been previously analyzed. Returns true if updated /// args data about this function. - bool computeArgsAccessed(const BinaryContext &BC, BinaryFunction &BF); + bool computeArgsAccessed(BinaryFunction &BF); /// Alias analysis to disambiguate which frame position is accessed by each /// instruction in function \p BF. Add MCAnnotation to /// instructions that access a frame position. Return false if it failed /// to analyze and this information can't be safely determined for \p BF. - bool restoreFrameIndex(const BinaryContext &BC, BinaryFunction &BF); + bool restoreFrameIndex(BinaryFunction &BF); public: - explicit FrameAnalysis(const cl::opt &PrintPass, - bool ClobberAnalysisOnly=false) - : BinaryFunctionPass(PrintPass), - ClobberAnalysisOnly(ClobberAnalysisOnly) { - // Position 0 of the vector should be always associated with "assume access - // everything". - ArgAccessesVector.emplace_back(ArgAccesses(/*AssumeEverything*/ true)); - } - - const char *getName() const override { - return "frame-analysis"; - } + explicit FrameAnalysis(BinaryContext &BC, + std::map &BFs, + BinaryFunctionCallGraph &CG); /// Return true if we could fully analyze \p Func bool hasFrameInfo(const BinaryFunction &Func) const { @@ -217,30 +183,19 @@ public: return FunctionsRequireAlignment.count(&Func); } - /// Compute the set of registers \p Inst may write to, marking them in - /// \p KillSet. If this is a call, try to get the set of registers the call - /// target will write to. - void getInstClobberList(const BinaryContext &BC, const MCInst &Inst, - BitVector &KillSet) const; - /// Functions for retrieving our specific MCAnnotation data from instructions - ErrorOr getArgAccessesFor(const BinaryContext &BC, - const MCInst &Inst); + ErrorOr getArgAccessesFor(const MCInst &Inst); - ErrorOr getArgAccessesFor(const BinaryContext &BC, - const MCInst &Inst) const; + ErrorOr getArgAccessesFor(const MCInst &Inst) const; - ErrorOr getFIEFor(const BinaryContext &BC, - const MCInst &Inst) const; - - /// Pass entry point - void runOnFunctions(BinaryContext &BC, - std::map &BFs, - std::set &LargeFunctions) override; + ErrorOr getFIEFor(const MCInst &Inst) const; /// Remove all MCAnnotations attached by this pass - void cleanAnnotations(const BinaryContext &BC, - std::map &BFs); + void cleanAnnotations(); + + ~FrameAnalysis() { + cleanAnnotations(); + } /// Print to standard output statistics about the analysis performed by this diff --git a/bolt/Passes/FrameOptimizer.cpp b/bolt/Passes/FrameOptimizer.cpp index 4662cf87515b..094e668f419e 100644 --- a/bolt/Passes/FrameOptimizer.cpp +++ b/bolt/Passes/FrameOptimizer.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include "FrameOptimizer.h" -#include "FrameAnalysis.h" #include "ShrinkWrapping.h" #include "StackAvailableExpressions.h" #include "StackReachingUses.h" @@ -45,10 +44,11 @@ FrameOptimization("frame-opt", namespace llvm { namespace bolt { -void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA, +void FrameOptimizerPass::removeUnnecessaryLoads(const RegAnalysis &RA, + const FrameAnalysis &FA, const BinaryContext &BC, BinaryFunction &BF) { - StackAvailableExpressions SAE(FA, BC, BF); + StackAvailableExpressions SAE(RA, FA, BC, BF); SAE.run(); DEBUG(dbgs() << "Performing unnecessary loads removal\n"); @@ -71,7 +71,7 @@ void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA, // if Inst is a load from stack and the current available expressions show // this value is available in a register or immediate, replace this load // with move from register or from immediate. - auto FIEX = FA.getFIEFor(BC, Inst); + auto FIEX = FA.getFIEFor(Inst); if (!FIEX) { Prev = &Inst; continue; @@ -88,7 +88,7 @@ void FrameOptimizerPass::removeUnnecessaryLoads(const FrameAnalysis &FA, for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB); I != ExprEnd; ++I) { const MCInst *AvailableInst = *I; - auto FIEY = FA.getFIEFor(BC, *AvailableInst); + auto FIEY = FA.getFIEFor(*AvailableInst); if (!FIEY) continue; assert(FIEY->IsStore && FIEY->IsSimple); @@ -172,7 +172,7 @@ void FrameOptimizerPass::removeUnusedStores(const FrameAnalysis &FA, (*I)->dump(); } }); - auto FIEX = FA.getFIEFor(BC, Inst); + auto FIEX = FA.getFIEFor(Inst); if (!FIEX) { Prev = &Inst; continue; @@ -217,8 +217,9 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC, return; // Run FrameAnalysis pass - FrameAnalysis FA(PrintPass); - FA.runOnFunctions(BC, BFs, LargeFunctions); + BinaryFunctionCallGraph CG = buildCallGraph(BC, BFs); + FrameAnalysis FA(BC, BFs, CG); + RegAnalysis RA(BC, BFs, CG); // Our main loop: perform caller-saved register optimizations, then // callee-saved register optimizations (shrink wrapping). @@ -237,7 +238,7 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC, } { NamedRegionTimer T1("remove loads", "FOP breakdown", true); - removeUnnecessaryLoads(FA, BC, I.second); + removeUnnecessaryLoads(RA, FA, BC, I.second); } { NamedRegionTimer T1("remove stores", "FOP breakdown", true); @@ -248,14 +249,12 @@ void FrameOptimizerPass::runOnFunctions(BinaryContext &BC, continue; { NamedRegionTimer T1("move spills", "FOP breakdown", true); - DataflowInfoManager Info(&FA, BC, I.second); + DataflowInfoManager Info(BC, I.second, &RA, &FA); ShrinkWrapping SW(FA, BC, I.second, Info); SW.perform(); } } - FA.cleanAnnotations(BC, BFs); - outs() << "BOLT-INFO: FOP optimized " << NumRedundantLoads << " redundant load(s) and " << NumRedundantStores << " unused store(s)\n"; diff --git a/bolt/Passes/FrameOptimizer.h b/bolt/Passes/FrameOptimizer.h index 4ba8e1c2bb56..3c6e3bee168a 100644 --- a/bolt/Passes/FrameOptimizer.h +++ b/bolt/Passes/FrameOptimizer.h @@ -14,6 +14,7 @@ #include "BinaryPasses.h" #include "FrameAnalysis.h" +#include "RegAnalysis.h" namespace llvm { namespace bolt { @@ -86,7 +87,8 @@ class FrameOptimizerPass : public BinaryFunctionPass { /// Perform a dataflow analysis in \p BF to reveal unnecessary reloads from /// the frame. Use the analysis to convert memory loads to register moves or /// immediate loads. Delete redundant register moves. - void removeUnnecessaryLoads(const FrameAnalysis &FA, + void removeUnnecessaryLoads(const RegAnalysis &RA, + const FrameAnalysis &FA, const BinaryContext &BC, BinaryFunction &BF); diff --git a/bolt/Passes/IndirectCallPromotion.cpp b/bolt/Passes/IndirectCallPromotion.cpp index 7d7311347d6a..b2e54906db1b 100644 --- a/bolt/Passes/IndirectCallPromotion.cpp +++ b/bolt/Passes/IndirectCallPromotion.cpp @@ -679,9 +679,12 @@ void IndirectCallPromotion::runOnFunctions( if (opts::IndirectCallPromotion == ICP_NONE) return; - FrameAnalysis FA(PrintPass, /*ClobberAnalysisOnly=*/true); - if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES) - FA.runOnFunctions(BC, BFs, LargeFunctions); + std::unique_ptr RA; + std::unique_ptr CG; + if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES) { + CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC, BFs))); + RA.reset(new RegAnalysis(BC, BFs, *CG)); + } for (auto &BFIt : BFs) { auto &Function = BFIt.second; @@ -716,7 +719,7 @@ void IndirectCallPromotion::runOnFunctions( if (BBs.empty()) continue; - DataflowInfoManager Info(&FA, BC, Function); + DataflowInfoManager Info(BC, Function, RA.get(), nullptr); while (!BBs.empty()) { auto *BB = BBs.back(); BBs.pop_back(); @@ -864,9 +867,6 @@ void IndirectCallPromotion::runOnFunctions( TotalIndirectJmps += FuncTotalIndirectJmps; } - if (opts::IndirectCallPromotion >= ICP_JUMP_TABLES) - FA.cleanAnnotations(BC, BFs); - outs() << "BOLT-INFO: ICP total indirect callsites = " << TotalIndirectCallsites << "\n" diff --git a/bolt/Passes/LivenessAnalysis.h b/bolt/Passes/LivenessAnalysis.h index ed9e0f00a1e2..739f49150f4d 100644 --- a/bolt/Passes/LivenessAnalysis.h +++ b/bolt/Passes/LivenessAnalysis.h @@ -13,9 +13,14 @@ #define LLVM_TOOLS_LLVM_BOLT_PASSES_LIVENESSANALYSIS_H #include "DataflowAnalysis.h" -#include "FrameAnalysis.h" +#include "RegAnalysis.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Timer.h" +namespace opts { +extern llvm::cl::opt AssumeABI; +} + namespace llvm { namespace bolt { @@ -24,9 +29,9 @@ class LivenessAnalysis friend class DataflowAnalysis; public: - LivenessAnalysis(const FrameAnalysis &FA, const BinaryContext &BC, + LivenessAnalysis(const RegAnalysis &RA, const BinaryContext &BC, BinaryFunction &BF) - : DataflowAnalysis(BC, BF), FA(FA), + : DataflowAnalysis(BC, BF), RA(RA), NumRegs(BC.MRI->getNumRegs()) {} virtual ~LivenessAnalysis(); @@ -42,9 +47,21 @@ public: DataflowAnalysis::run(); } + // Return a usable general-purpose reg after point P. Return 0 if no reg is + // available. + MCPhysReg scavengeRegAfter(ProgramPoint P) { + BitVector BV = *this->getStateAt(P); + BV.flip(); + BitVector GPRegs(NumRegs, false); + this->BC.MIA->getGPRegs(GPRegs, *this->BC.MRI); + BV &= GPRegs; + int Reg = BV.find_first(); + return Reg != -1 ? Reg : 0; + } + protected: - /// Reference to the result of stack frame analysis - const FrameAnalysis &FA; + /// Reference to the result of reg analysis + const RegAnalysis &RA; const uint16_t NumRegs; void preflight() {} @@ -63,18 +80,34 @@ protected: BitVector computeNext(const MCInst &Point, const BitVector &Cur) { BitVector Next = Cur; + bool IsCall = this->BC.MIA->isCall(Point); // Kill auto Written = BitVector(NumRegs, false); - if (this->BC.MIA->isCall(Point)) - FA.getInstClobberList(this->BC, Point, Written); - else + if (!IsCall) { this->BC.MIA->getWrittenRegs(Point, Written, *this->BC.MRI); + } else { + RA.getInstClobberList(Point, Written); + // When clobber list is conservative, it is clobbering all/most registers, + // a conservative estimate because it knows nothing about this call. + // For our purposes, assume it kills no registers/callee-saved regs + // because we don't really know what's going on. + if (RA.isConservative(Written)) { + Written.reset(); + BC.MIA->getCalleeSavedRegs(Written, *this->BC.MRI); + } + } Written.flip(); Next &= Written; // Gen if (!this->BC.MIA->isCFI(Point)) { auto Used = BitVector(NumRegs, false); - this->BC.MIA->getUsedRegs(Point, Used, *this->BC.MRI); + RA.getInstUsedRegsList(Point, Used, /*GetClobbers*/false); + if (IsCall && + (!BC.MIA->isTailCall(Point) || !BC.MIA->isConditionalBranch(Point))) { + // Never gen FLAGS from a non-conditional call... this is overly + // conservative + Used.reset(BC.MIA->getFlagsReg()); + } Next |= Used; } return Next; diff --git a/bolt/Passes/ReachingDefOrUse.h b/bolt/Passes/ReachingDefOrUse.h index 9b5f8695b3f1..8d11ec0d9c5c 100644 --- a/bolt/Passes/ReachingDefOrUse.h +++ b/bolt/Passes/ReachingDefOrUse.h @@ -13,6 +13,7 @@ #define LLVM_TOOLS_LLVM_BOLT_PASSES_REACHINGDEFORUSE_H #include "DataflowAnalysis.h" +#include "RegAnalysis.h" #include "llvm/Support/Timer.h" namespace llvm { @@ -28,16 +29,16 @@ class ReachingDefOrUse friend class DataflowAnalysis, BitVector, !Def>; public: - ReachingDefOrUse(const FrameAnalysis &FA, const BinaryContext &BC, + ReachingDefOrUse(const RegAnalysis &RA, const BinaryContext &BC, BinaryFunction &BF) - : InstrsDataflowAnalysis, !Def>(BC, BF), FA(FA) {} + : InstrsDataflowAnalysis, !Def>(BC, BF), RA(RA) {} virtual ~ReachingDefOrUse() {} bool isReachedBy(MCPhysReg Reg, ExprIterator Candidates) { for (auto I = Candidates; I != this->expr_end(); ++I) { auto BV = BitVector(this->BC.MRI->getNumRegs(), false); if (Def) { - FA.getInstClobberList(this->BC, **I, BV); + RA.getInstClobberList(**I, BV); } else { this->BC.MIA->getTouchedRegs(**I, BV, *this->BC.MRI); } @@ -57,8 +58,8 @@ public: } protected: - /// Reference to the result of stack frame analysis - const FrameAnalysis &FA; + /// Reference to the result of reg analysis + const RegAnalysis &RA; void preflight() { // Populate our universe of tracked expressions with all instructions @@ -89,11 +90,11 @@ protected: // getClobberedRegs for X and Y. If they intersect, return true auto XClobbers = BitVector(this->BC.MRI->getNumRegs(), false); auto YClobbers = BitVector(this->BC.MRI->getNumRegs(), false); - FA.getInstClobberList(this->BC, *X, XClobbers); + RA.getInstClobberList(*X, XClobbers); // In defs, write after write -> kills first write // In uses, write after access (read or write) -> kills access if (Def) - FA.getInstClobberList(this->BC, *Y, YClobbers); + RA.getInstClobberList(*Y, YClobbers); else this->BC.MIA->getTouchedRegs(*Y, YClobbers, *this->BC.MRI); // X kills Y if it clobbers Y completely -- this is a conservative approach. diff --git a/bolt/Passes/RegAnalysis.cpp b/bolt/Passes/RegAnalysis.cpp new file mode 100644 index 000000000000..b17ada273daf --- /dev/null +++ b/bolt/Passes/RegAnalysis.cpp @@ -0,0 +1,207 @@ +#include "RegAnalysis.h" +#include "CallGraphWalker.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "ra" + +using namespace llvm; + +namespace opts { +extern cl::opt Verbosity; +extern cl::OptionCategory BoltOptCategory; + +cl::opt AssumeABI( + "assume-abi", + cl::desc("assume the ABI is never violated"), + cl::ZeroOrMore, + cl::init(false), + cl::cat(BoltOptCategory)); +} + +namespace llvm { +namespace bolt { + +RegAnalysis::RegAnalysis(BinaryContext &BC, + std::map &BFs, + BinaryFunctionCallGraph &CG) + : BC(BC) { + CallGraphWalker CGWalker(BC, BFs, CG); + + CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool { + BitVector RegsKilled = getFunctionClobberList(Func); + bool Updated = RegsKilledMap.find(Func) == RegsKilledMap.end() || + RegsKilledMap[Func] != RegsKilled; + if (Updated) + RegsKilledMap[Func] = std::move(RegsKilled); + return Updated; + }); + + CGWalker.registerVisitor([&](BinaryFunction *Func) -> bool { + BitVector RegsGen = getFunctionUsedRegsList(Func); + bool Updated = RegsGenMap.find(Func) == RegsGenMap.end() || + RegsGenMap[Func] != RegsGen; + if (Updated) + RegsGenMap[Func] = std::move(RegsGen); + return Updated; + }); + + CGWalker.walk(); + + if (opts::Verbosity == 0) { +#ifndef NDEBUG + if (!DebugFlag || !isCurrentDebugType(DEBUG_TYPE)) + return; +#else + return; +#endif + } + + // This loop is for computing statistics only + for (auto &MapEntry : BFs) { + auto *Func = &MapEntry.second; + auto Iter = RegsKilledMap.find(Func); + assert(Iter != RegsKilledMap.end() && + "Failed to compute all clobbers list"); + if (Iter->second.all()) { + auto Count = Func->getExecutionCount(); + if (Count != BinaryFunction::COUNT_NO_PROFILE) + CountFunctionsAllClobber += Count; + ++NumFunctionsAllClobber; + } + DEBUG_WITH_TYPE("fa", + dbgs() << "Killed regs set for func: " << Func->getPrintName() << "\n"; + const BitVector &RegsKilled = Iter->second; + int RegIdx = RegsKilled.find_first(); + while (RegIdx != -1) { + dbgs() << "\tREG" << RegIdx; + RegIdx = RegsKilled.find_next(RegIdx); + }; + dbgs() << "\nUsed regs set for func: " << Func->getPrintName() << "\n"; + const BitVector &RegsUsed = RegsGenMap.find(Func)->second; + RegIdx = RegsUsed.find_first(); + while (RegIdx != -1) { + dbgs() << "\tREG" << RegIdx; + RegIdx = RegsUsed.find_next(RegIdx); + }; + dbgs() << "\n"; + ); + } +} + +void RegAnalysis::beConservative(BitVector &Result) const { + if (!opts::AssumeABI) { + Result.set(); + } else { + BitVector BV(BC.MRI->getNumRegs(), false); + BC.MIA->getCalleeSavedRegs(BV, *BC.MRI); + BV.flip(); + Result |= BV; + } +} + +bool RegAnalysis::isConservative(BitVector &Vec) const { + if (!opts::AssumeABI) { + return Vec.all(); + } else { + BitVector BV(BC.MRI->getNumRegs(), false); + BC.MIA->getCalleeSavedRegs(BV, *BC.MRI); + BV |= Vec; + return BV.all(); + } +} + +void RegAnalysis::getInstUsedRegsList(const MCInst &Inst, BitVector &RegSet, + bool GetClobbers) const { + if (!BC.MIA->isCall(Inst)) { + if (GetClobbers) + BC.MIA->getClobberedRegs(Inst, RegSet, *BC.MRI); + else + BC.MIA->getUsedRegs(Inst, RegSet, *BC.MRI); + return; + } + + const auto *TargetSymbol = BC.MIA->getTargetSymbol(Inst); + // If indirect call, we know nothing + if (TargetSymbol == nullptr) { + beConservative(RegSet); + return; + } + + const auto *Function = BC.getFunctionForSymbol(TargetSymbol); + if (Function == nullptr) { + // Call to a function without a BinaryFunction object. + // This should be a call to a PLT entry, and since it is a trampoline to + // a DSO, we can't really know the code in advance. + beConservative(RegSet); + return; + } + if (GetClobbers) { + auto BV = RegsKilledMap.find(Function); + if (BV != RegsKilledMap.end()) { + RegSet |= BV->second; + return; + } + // Ignore calls to function whose clobber list wasn't yet calculated. This + // instruction will be evaluated again once we have info for the callee. + return; + } + auto BV = RegsGenMap.find(Function); + if (BV != RegsGenMap.end()) { + RegSet |= BV->second; + return; + } +} + +void RegAnalysis::getInstClobberList(const MCInst &Inst, + BitVector &KillSet) const { + return getInstUsedRegsList(Inst, KillSet, /*GetClobbers*/ true); +} + +BitVector RegAnalysis::getFunctionUsedRegsList(const BinaryFunction *Func) { + BitVector UsedRegs = BitVector(BC.MRI->getNumRegs(), false); + + if (!Func->isSimple() || !Func->hasCFG()) { + beConservative(UsedRegs); + return UsedRegs; + } + + for (const auto &BB : *Func) { + for (const auto &Inst : BB) { + getInstUsedRegsList(Inst, UsedRegs, /*GetClobbers*/false); + if (UsedRegs.all()) + return UsedRegs; + } + } + + return UsedRegs; +} + +BitVector RegAnalysis::getFunctionClobberList(const BinaryFunction *Func) { + BitVector RegsKilled = BitVector(BC.MRI->getNumRegs(), false); + + if (!Func->isSimple() || !Func->hasCFG()) { + beConservative(RegsKilled); + return RegsKilled; + } + + for (const auto &BB : *Func) { + for (const auto &Inst : BB) { + getInstClobberList(Inst, RegsKilled); + if (RegsKilled.all()) + return RegsKilled; + } + } + + return RegsKilled; +} + +void RegAnalysis::printStats() { + outs() << "BOLT-INFO REG ANALYSIS: Number of functions conservatively " + "treated as clobbering all registers: " + << NumFunctionsAllClobber + << format(" (%.1lf%% dyn cov)\n", + (100.0 * CountFunctionsAllClobber / CountDenominator)); +} + +} +} diff --git a/bolt/Passes/RegAnalysis.h b/bolt/Passes/RegAnalysis.h new file mode 100644 index 000000000000..dd802bcfb5f3 --- /dev/null +++ b/bolt/Passes/RegAnalysis.h @@ -0,0 +1,82 @@ +//===--- Passes/RegAnalysis.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_REGANALYSIS_H +#define LLVM_TOOLS_LLVM_BOLT_PASSES_REGANALYSIS_H + +#include "BinaryContext.h" +#include "BinaryFunction.h" +#include "BinaryFunctionCallGraph.h" +#include "llvm/ADT/BitVector.h" +#include + +namespace llvm { +namespace bolt { + +/// Determine the set of registers read or clobbered for each instruction +/// in a BinaryFunction. If the instruction is a call, this analysis rely on +/// a call graph traversal to accurately extract the set of registers touched +/// after the call returns. +class RegAnalysis { + BinaryContext &BC; + + /// Map functions to the set of registers they may overwrite starting at when + /// it is called until it returns to the caller. + std::map RegsKilledMap; + + /// Similar concept above but for registers that are read in that function. + std::map RegsGenMap; + + /// Analysis stats counters + uint64_t NumFunctionsAllClobber{0}; + uint64_t CountFunctionsAllClobber{0}; + uint64_t CountDenominator{0}; + + /// Helper function used to get the set of clobbered/used regs whenever + /// we know nothing about the function. + void beConservative(BitVector &Result) const; + + /// Compute the set of registers \p Func may read from during its execution. + BitVector getFunctionUsedRegsList(const BinaryFunction *Func); + + /// Compute the set of registers \p Func may write to during its execution, + /// starting at the point when it is called up until when it returns. Returns + /// a BitVector the size of the target number of registers, representing the + /// set of clobbered registers. + BitVector getFunctionClobberList(const BinaryFunction *Func); + +public: + RegAnalysis(BinaryContext &BC, std::map &BFs, + BinaryFunctionCallGraph &CG); + + /// Compute the set of registers \p Inst may read from, marking them in + /// \p RegSet. If GetClobbers is true, the set set the instr may write to. + /// Use the callgraph to fill out this info for calls. + void getInstUsedRegsList(const MCInst &Inst, BitVector &RegSet, + bool GetClobbers) const; + + /// Compute the set of registers \p Inst may write to, marking them in + /// \p KillSet. If this is a call, try to get the set of registers the call + /// target will write to. + void getInstClobberList(const MCInst &Inst, BitVector &KillSet) const; + + /// Return true iff Vec has a conservative estimation of used/clobbered regs, + /// expressing no specific knowledge of reg usage. + bool isConservative(BitVector &Vec) const; + + /// Print stats about the quality of our analysis + void printStats(); +}; + +} +} + +#endif diff --git a/bolt/Passes/ShrinkWrapping.cpp b/bolt/Passes/ShrinkWrapping.cpp index dcc5b5758c60..58570fb036b3 100644 --- a/bolt/Passes/ShrinkWrapping.cpp +++ b/bolt/Passes/ShrinkWrapping.cpp @@ -41,7 +41,7 @@ void CalleeSavedAnalysis::analyzeSaves() { DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); const MCInst *Prev = nullptr; for (auto &Inst : BB) { - if (auto FIE = FA.getFIEFor(BC, Inst)) { + if (auto FIE = FA.getFIEFor(Inst)) { if (!FIE->IsStore || !FIE->IsSimple || !FIE->IsStoreFromReg || FIE->StackOffset >= 0) { Prev = &Inst; @@ -86,7 +86,7 @@ void CalleeSavedAnalysis::analyzeRestores() { const MCInst *Prev = nullptr; for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { auto &Inst = *I; - if (auto FIE = FA.getFIEFor(BC, Inst)) { + if (auto FIE = FA.getFIEFor(Inst)) { if (!FIE->IsLoad || !FIE->IsSimple || !CalleeSaved[FIE->RegOrImm] || FIE->StackOffset >= 0) { Prev = &Inst; @@ -229,7 +229,7 @@ void StackLayoutModifier::classifyStackAccesses() { for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { auto &Inst = *I; checkFramePointerInitialization(Inst); - auto FIEX = FA.getFIEFor(BC, Inst); + auto FIEX = FA.getFIEFor(Inst); if (!FIEX) { Prev = &Inst; continue; @@ -346,7 +346,7 @@ bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { if (!IsSimple || !BC.MIA->isPush(*DeletedPush)) return false; - auto FIE = FA.getFIEFor(BC, *DeletedPush); + auto FIE = FA.getFIEFor(*DeletedPush); if (!FIE) return false; @@ -370,7 +370,7 @@ bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { } bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { - auto FIE = FA.getFIEFor(BC, *DeletedPush); + auto FIE = FA.getFIEFor(*DeletedPush); if (!FIE) return false; int64_t RegionAddr = FIE->StackOffset; @@ -414,7 +414,7 @@ bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, continue; } - auto FIE = FA.getFIEFor(BC, Inst); + auto FIE = FA.getFIEFor(Inst); assert(FIE); if (FIE->StackPtrReg == BC.MIA->getStackPointer() && Slot < RegionAddr) continue; @@ -499,7 +499,7 @@ bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { continue; } - auto FIE = FA.getFIEFor(BC, Inst); + auto FIE = FA.getFIEFor(Inst); assert(FIE); if (FIE->StackPtrReg == BC.MIA->getStackPointer() && Slot < RegionAddr) continue; diff --git a/bolt/Passes/StackAvailableExpressions.cpp b/bolt/Passes/StackAvailableExpressions.cpp index d0a5f5b1c12a..a2169d2992ce 100644 --- a/bolt/Passes/StackAvailableExpressions.cpp +++ b/bolt/Passes/StackAvailableExpressions.cpp @@ -17,10 +17,11 @@ namespace llvm { namespace bolt { -StackAvailableExpressions::StackAvailableExpressions(const FrameAnalysis &FA, +StackAvailableExpressions::StackAvailableExpressions(const RegAnalysis &RA, + const FrameAnalysis &FA, const BinaryContext &BC, BinaryFunction &BF) - : InstrsDataflowAnalysis(BC, BF), FA(FA) {} + : InstrsDataflowAnalysis(BC, BF), RA(RA), FA(FA) {} void StackAvailableExpressions::preflight() { DEBUG(dbgs() << "Starting StackAvailableExpressions on \"" @@ -31,7 +32,7 @@ void StackAvailableExpressions::preflight() { // program. for (auto &BB : Func) { for (auto &Inst : BB) { - auto FIE = FA.getFIEFor(BC, Inst); + auto FIE = FA.getFIEFor(Inst); if (!FIE) continue; if (FIE->IsStore == true && FIE->IsSimple == true) { @@ -80,8 +81,8 @@ bool isLoadRedundant(const FrameIndexEntry &LoadFIE, bool StackAvailableExpressions::doesXKillsY(const MCInst *X, const MCInst *Y) { // if both are stores, and both store to the same stack location, return // true - auto FIEX = FA.getFIEFor(BC, *X); - auto FIEY = FA.getFIEFor(BC, *Y); + auto FIEX = FA.getFIEFor(*X); + auto FIEY = FA.getFIEFor(*Y); if (FIEX && FIEY) { if (isLoadRedundant(*FIEX, *FIEY)) return false; @@ -93,14 +94,14 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst *X, const MCInst *Y) { // getClobberedRegs for X and Y. If they intersect, return true BitVector XClobbers = BitVector(BC.MRI->getNumRegs(), false); BitVector YClobbers = BitVector(BC.MRI->getNumRegs(), false); - FA.getInstClobberList(BC, *X, XClobbers); + RA.getInstClobberList(*X, XClobbers); // If Y is a store to stack, its clobber list is its source reg. This is // different than the rest because we want to check if the store source // reaches its corresponding load untouched. if (FIEY && FIEY->IsStore == true && FIEY->IsStoreFromReg) { YClobbers.set(FIEY->RegOrImm); } else { - FA.getInstClobberList(BC, *Y, YClobbers); + RA.getInstClobberList(*Y, YClobbers); } XClobbers &= YClobbers; return XClobbers.any(); @@ -121,7 +122,7 @@ BitVector StackAvailableExpressions::computeNext(const MCInst &Point, } } // Gen - if (auto FIE = FA.getFIEFor(BC, Point)) { + if (auto FIE = FA.getFIEFor(Point)) { if (FIE->IsStore == true && FIE->IsSimple == true) Next.set(ExprToIdx[&Point]); } diff --git a/bolt/Passes/StackAvailableExpressions.h b/bolt/Passes/StackAvailableExpressions.h index 6ec3234ff6ad..d96f49d3886b 100644 --- a/bolt/Passes/StackAvailableExpressions.h +++ b/bolt/Passes/StackAvailableExpressions.h @@ -13,6 +13,7 @@ #define LLVM_TOOLS_LLVM_BOLT_PASSES_STACKAVAILABLEEXPRESSIONS_H #include "DataflowAnalysis.h" +#include "RegAnalysis.h" #include "llvm/Support/Timer.h" namespace llvm { @@ -25,7 +26,7 @@ class StackAvailableExpressions friend class DataflowAnalysis; public: - StackAvailableExpressions(const FrameAnalysis &FA, + StackAvailableExpressions(const RegAnalysis &RA, const FrameAnalysis &FA, const BinaryContext &BC, BinaryFunction &BF); virtual ~StackAvailableExpressions() {} @@ -35,7 +36,7 @@ public: } protected: - /// Reference to the result of stack frame analysis + const RegAnalysis &RA; const FrameAnalysis &FA; void preflight(); diff --git a/bolt/Passes/StackReachingUses.cpp b/bolt/Passes/StackReachingUses.cpp index 68e76b1438ff..a7a91e92b06a 100644 --- a/bolt/Passes/StackReachingUses.cpp +++ b/bolt/Passes/StackReachingUses.cpp @@ -22,7 +22,7 @@ bool StackReachingUses::isStoreUsed(const FrameIndexEntry &StoreFIE, for (auto I = Candidates; I != expr_end(); ++I) { const MCInst *ReachingInst = *I; if (IncludeLocalAccesses) { - if (auto FIEY = FA.getFIEFor(BC, *ReachingInst)) { + if (auto FIEY = FA.getFIEFor(*ReachingInst)) { assert(FIEY->IsLoad == 1); if (StoreFIE.StackOffset + StoreFIE.Size > FIEY->StackOffset && StoreFIE.StackOffset < FIEY->StackOffset + FIEY->Size) { @@ -30,7 +30,7 @@ bool StackReachingUses::isStoreUsed(const FrameIndexEntry &StoreFIE, } } } - auto Args = FA.getArgAccessesFor(BC, *ReachingInst); + auto Args = FA.getArgAccessesFor(*ReachingInst); if (!Args) continue; if (Args->AssumeEverything) { @@ -55,14 +55,14 @@ void StackReachingUses::preflight() { // program. for (auto &BB : Func) { for (auto &Inst : BB) { - if (auto FIE = FA.getFIEFor(BC, Inst)) { + if (auto FIE = FA.getFIEFor(Inst)) { if (FIE->IsLoad == true) { Expressions.push_back(&Inst); ExprToIdx[&Inst] = NumInstrs++; continue; } } - auto AA = FA.getArgAccessesFor(BC, Inst); + auto AA = FA.getArgAccessesFor(Inst); if (AA && (!AA->Set.empty() || AA->AssumeEverything)) { Expressions.push_back(&Inst); ExprToIdx[&Inst] = NumInstrs++; @@ -74,8 +74,8 @@ void StackReachingUses::preflight() { bool StackReachingUses::doesXKillsY(const MCInst *X, const MCInst *Y) { // if X is a store to the same stack location and the bytes fetched is a // superset of those bytes affected by the load in Y, return true - auto FIEX = FA.getFIEFor(BC, *X); - auto FIEY = FA.getFIEFor(BC, *Y); + auto FIEX = FA.getFIEFor(*X); + auto FIEY = FA.getFIEFor(*Y); if (FIEX && FIEY) { if (FIEX->IsStore == true && FIEY->IsLoad == true && FIEX->StackOffset <= FIEY->StackOffset && @@ -98,11 +98,11 @@ BitVector StackReachingUses::computeNext(const MCInst &Point, } }; // Gen - if (auto FIE = FA.getFIEFor(BC, Point)) { + if (auto FIE = FA.getFIEFor(Point)) { if (FIE->IsLoad == true) Next.set(ExprToIdx[&Point]); } - auto AA = FA.getArgAccessesFor(BC, Point); + auto AA = FA.getArgAccessesFor(Point); if (AA && (!AA->Set.empty() || AA->AssumeEverything)) Next.set(ExprToIdx[&Point]); return Next;