forked from OSchip/llvm-project
Introduce pass to reduce jump tables footprint
Summary: Add a pass to identify indirect jumps to jump tables and reduce their entries size from 8 to 4 bytes. For PIC jump tables, it will convert the PIC code to non-PIC (since BOLT only processes static code, it makes no sense to use expensive PIC-style jumps in static code). Add corresponding improvements to register scavenging pass and add a MCInst matcher machinery. (cherry picked from FBD6421582)
This commit is contained in:
parent
39a8c36697
commit
21eb2139ee
|
@ -3826,7 +3826,7 @@ uint64_t BinaryFunction::JumpTable::emit(MCStreamer *Streamer,
|
||||||
LastLabel = LI->second;
|
LastLabel = LI->second;
|
||||||
}
|
}
|
||||||
if (Type == JTT_NORMAL) {
|
if (Type == JTT_NORMAL) {
|
||||||
Streamer->EmitSymbolValue(Entry, EntrySize);
|
Streamer->EmitSymbolValue(Entry, OutputEntrySize);
|
||||||
} else { // JTT_PIC
|
} else { // JTT_PIC
|
||||||
auto JT = MCSymbolRefExpr::create(LastLabel, Streamer->getContext());
|
auto JT = MCSymbolRefExpr::create(LastLabel, Streamer->getContext());
|
||||||
auto E = MCSymbolRefExpr::create(Entry, Streamer->getContext());
|
auto E = MCSymbolRefExpr::create(Entry, Streamer->getContext());
|
||||||
|
|
|
@ -538,6 +538,9 @@ public:
|
||||||
/// Size of the entry used for storage.
|
/// Size of the entry used for storage.
|
||||||
std::size_t EntrySize;
|
std::size_t EntrySize;
|
||||||
|
|
||||||
|
/// Size of the entry size we will write (we may use a more compact layout)
|
||||||
|
std::size_t OutputEntrySize;
|
||||||
|
|
||||||
/// The type of this jump table.
|
/// The type of this jump table.
|
||||||
JumpTableType Type;
|
JumpTableType Type;
|
||||||
|
|
||||||
|
@ -567,14 +570,11 @@ public:
|
||||||
std::pair<size_t, size_t> getEntriesForAddress(const uint64_t Addr) const;
|
std::pair<size_t, size_t> getEntriesForAddress(const uint64_t Addr) const;
|
||||||
|
|
||||||
/// Constructor.
|
/// Constructor.
|
||||||
JumpTable(uint64_t Address,
|
JumpTable(uint64_t Address, std::size_t EntrySize, JumpTableType Type,
|
||||||
std::size_t EntrySize,
|
|
||||||
JumpTableType Type,
|
|
||||||
decltype(OffsetEntries) &&OffsetEntries,
|
decltype(OffsetEntries) &&OffsetEntries,
|
||||||
decltype(Labels) &&Labels)
|
decltype(Labels) &&Labels)
|
||||||
: Address(Address), EntrySize(EntrySize), Type(Type),
|
: Address(Address), EntrySize(EntrySize), OutputEntrySize(EntrySize),
|
||||||
OffsetEntries(OffsetEntries), Labels(Labels)
|
Type(Type), OffsetEntries(OffsetEntries), Labels(Labels) {}
|
||||||
{}
|
|
||||||
|
|
||||||
/// Dynamic number of times each entry in the table was referenced.
|
/// Dynamic number of times each entry in the table was referenced.
|
||||||
/// Identical entries will have a shared count (identical for every
|
/// Identical entries will have a shared count (identical for every
|
||||||
|
@ -1275,6 +1275,11 @@ public:
|
||||||
return getJumpTableContainingAddress(Address);
|
return getJumpTableContainingAddress(Address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JumpTable *getJumpTable(const MCInst &Inst) {
|
||||||
|
const auto Address = BC.MIA->getJumpTable(Inst);
|
||||||
|
return getJumpTableContainingAddress(Address);
|
||||||
|
}
|
||||||
|
|
||||||
const MCSymbol *getPersonalityFunction() const {
|
const MCSymbol *getPersonalityFunction() const {
|
||||||
return PersonalityFunction;
|
return PersonalityFunction;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "Passes/IndirectCallPromotion.h"
|
#include "Passes/IndirectCallPromotion.h"
|
||||||
#include "Passes/Inliner.h"
|
#include "Passes/Inliner.h"
|
||||||
#include "Passes/LongJmp.h"
|
#include "Passes/LongJmp.h"
|
||||||
|
#include "Passes/JTFootprintReduction.h"
|
||||||
#include "Passes/PLTCall.h"
|
#include "Passes/PLTCall.h"
|
||||||
#include "Passes/ReorderFunctions.h"
|
#include "Passes/ReorderFunctions.h"
|
||||||
#include "Passes/StokeInfo.h"
|
#include "Passes/StokeInfo.h"
|
||||||
|
@ -62,6 +63,19 @@ InlineSmallFunctions("inline-small-functions",
|
||||||
cl::ZeroOrMore,
|
cl::ZeroOrMore,
|
||||||
cl::cat(BoltOptCategory));
|
cl::cat(BoltOptCategory));
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
JTFootprintReductionFlag("jt-footprint-reduction",
|
||||||
|
cl::desc("make jump tables size smaller at the cost of using more "
|
||||||
|
"instructions at jump sites"),
|
||||||
|
cl::ZeroOrMore,
|
||||||
|
cl::cat(BoltOptCategory));
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
PrintJTFootprintReduction("print-after-jt-footprint-reduction",
|
||||||
|
cl::desc("print function after jt-footprint-reduction pass"),
|
||||||
|
cl::ZeroOrMore,
|
||||||
|
cl::cat(BoltOptCategory));
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
NeverPrint("never-print",
|
NeverPrint("never-print",
|
||||||
cl::desc("never print"),
|
cl::desc("never print"),
|
||||||
|
@ -328,6 +342,10 @@ void BinaryFunctionPassManager::runAllPasses(
|
||||||
|
|
||||||
Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes));
|
Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes));
|
||||||
|
|
||||||
|
Manager.registerPass(
|
||||||
|
llvm::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
|
||||||
|
opts::JTFootprintReductionFlag);
|
||||||
|
|
||||||
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(PrintInline),
|
Manager.registerPass(llvm::make_unique<InlineSmallFunctions>(PrintInline),
|
||||||
opts::InlineSmallFunctions);
|
opts::InlineSmallFunctions);
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses
|
||||||
HFSortPlus.cpp
|
HFSortPlus.cpp
|
||||||
IndirectCallPromotion.cpp
|
IndirectCallPromotion.cpp
|
||||||
Inliner.cpp
|
Inliner.cpp
|
||||||
|
JTFootprintReduction.cpp
|
||||||
LivenessAnalysis.cpp
|
LivenessAnalysis.cpp
|
||||||
LongJmp.cpp
|
LongJmp.cpp
|
||||||
MCF.cpp
|
MCF.cpp
|
||||||
|
|
|
@ -37,6 +37,19 @@ void doForAllSuccs(const BinaryBasicBlock &BB,
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegStatePrinter::print(raw_ostream &OS, const BitVector &State) const {
|
void RegStatePrinter::print(raw_ostream &OS, const BitVector &State) const {
|
||||||
|
if (State.all()) {
|
||||||
|
OS << "(all)";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (State.count() > (State.size() >> 1)) {
|
||||||
|
OS << "all, except: ";
|
||||||
|
auto BV = State;
|
||||||
|
BV.flip();
|
||||||
|
for (auto I = BV.find_first(); I != -1; I = BV.find_next(I)) {
|
||||||
|
OS << BC.MRI->getName(I) << " ";
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
for (auto I = State.find_first(); I != -1; I = State.find_next(I)) {
|
for (auto I = State.find_first(); I != -1; I = State.find_next(I)) {
|
||||||
OS << BC.MRI->getName(I) << " ";
|
OS << BC.MRI->getName(I) << " ";
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,276 @@
|
||||||
|
//===--- JTFootprintReduction.cpp -----------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "JTFootprintReduction.h"
|
||||||
|
#include "llvm/Support/Options.h"
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "JT"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
using namespace bolt;
|
||||||
|
|
||||||
|
namespace opts {
|
||||||
|
|
||||||
|
extern cl::OptionCategory BoltOptCategory;
|
||||||
|
|
||||||
|
extern cl::opt<unsigned> Verbosity;
|
||||||
|
extern cl::opt<bool> Relocs;
|
||||||
|
extern bool shouldProcess(const bolt::BinaryFunction &Function);
|
||||||
|
|
||||||
|
extern cl::opt<JumpTableSupportLevel> JumpTables;
|
||||||
|
} // namespace opts
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
namespace bolt {
|
||||||
|
|
||||||
|
void JTFootprintReduction::checkOpportunities(BinaryContext &BC,
|
||||||
|
BinaryFunction &Function,
|
||||||
|
DataflowInfoManager &Info) {
|
||||||
|
std::map<BinaryFunction::JumpTable *, uint64_t> AllJTs;
|
||||||
|
|
||||||
|
for (auto &BB : Function) {
|
||||||
|
for (auto &Inst : BB) {
|
||||||
|
auto *JumpTable = Function.getJumpTable(Inst);
|
||||||
|
if (!JumpTable)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
AllJTs[JumpTable] += BB.getKnownExecutionCount();
|
||||||
|
++IndJmps;
|
||||||
|
|
||||||
|
if (BlacklistedJTs.count(JumpTable))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
uint64_t Scale;
|
||||||
|
// Try a standard indirect jump matcher
|
||||||
|
auto IndJmpMatcher = BC.MIA->matchIndJmp(
|
||||||
|
BC.MIA->matchAnyOperand(), BC.MIA->matchImm(Scale),
|
||||||
|
BC.MIA->matchReg(), BC.MIA->matchAnyOperand());
|
||||||
|
if (IndJmpMatcher->match(*BC.MRI, *BC.MIA,
|
||||||
|
MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1),
|
||||||
|
-1) &&
|
||||||
|
Scale == 8) {
|
||||||
|
if (Info.getLivenessAnalysis().scavengeRegAfter(&Inst))
|
||||||
|
continue;
|
||||||
|
BlacklistedJTs.insert(JumpTable);
|
||||||
|
++IndJmpsDenied;
|
||||||
|
++NumJTsNoReg;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try a PIC matcher. The pattern we are looking for is a PIC JT ind jmp:
|
||||||
|
// addq %rdx, %rsi
|
||||||
|
// addq %rdx, %rdi
|
||||||
|
// leaq DATAat0x402450(%rip), %r11
|
||||||
|
// movslq (%r11,%rdx,4), %rcx
|
||||||
|
// addq %r11, %rcx
|
||||||
|
// jmpq *%rcx # JUMPTABLE @0x402450
|
||||||
|
MCPhysReg BaseReg1;
|
||||||
|
MCPhysReg BaseReg2;
|
||||||
|
uint64_t Offset;
|
||||||
|
auto PICIndJmpMatcher = BC.MIA->matchIndJmp(BC.MIA->matchAdd(
|
||||||
|
BC.MIA->matchReg(BaseReg1),
|
||||||
|
BC.MIA->matchLoad(BC.MIA->matchReg(BaseReg2), BC.MIA->matchImm(Scale),
|
||||||
|
BC.MIA->matchReg(), BC.MIA->matchImm(Offset))));
|
||||||
|
auto PICBaseAddrMatcher = BC.MIA->matchIndJmp(
|
||||||
|
BC.MIA->matchAdd(BC.MIA->matchLoadAddr(BC.MIA->matchSymbol()),
|
||||||
|
BC.MIA->matchAnyOperand()));
|
||||||
|
if (!PICIndJmpMatcher->match(
|
||||||
|
*BC.MRI, *BC.MIA,
|
||||||
|
MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1), -1) ||
|
||||||
|
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
|
||||||
|
!PICBaseAddrMatcher->match(
|
||||||
|
*BC.MRI, *BC.MIA,
|
||||||
|
MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1), -1)) {
|
||||||
|
BlacklistedJTs.insert(JumpTable);
|
||||||
|
++IndJmpsDenied;
|
||||||
|
++NumJTsBadMatch;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Statistics only
|
||||||
|
for (const auto &JTFreq : AllJTs) {
|
||||||
|
auto *JT = JTFreq.first;
|
||||||
|
uint64_t CurScore = JTFreq.second;
|
||||||
|
TotalJTScore += CurScore;
|
||||||
|
if (!BlacklistedJTs.count(JT)) {
|
||||||
|
OptimizedScore += CurScore;
|
||||||
|
if (JT->EntrySize == 8)
|
||||||
|
BytesSaved += JT->getSize() >> 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TotalJTs += AllJTs.size();
|
||||||
|
TotalJTsDenied += BlacklistedJTs.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool JTFootprintReduction::tryOptimizeNonPIC(
|
||||||
|
BinaryContext &BC, BinaryBasicBlock &BB, MCInst &Inst, uint64_t JTAddr,
|
||||||
|
BinaryFunction::JumpTable *JumpTable, DataflowInfoManager &Info) {
|
||||||
|
|
||||||
|
MCOperand Base;
|
||||||
|
uint64_t Scale;
|
||||||
|
MCPhysReg Index;
|
||||||
|
MCOperand Offset;
|
||||||
|
auto IndJmpMatcher = BC.MIA->matchIndJmp(
|
||||||
|
BC.MIA->matchAnyOperand(Base), BC.MIA->matchImm(Scale),
|
||||||
|
BC.MIA->matchReg(Index), BC.MIA->matchAnyOperand(Offset));
|
||||||
|
if (!IndJmpMatcher->match(*BC.MRI, *BC.MIA,
|
||||||
|
MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1),
|
||||||
|
-1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(Scale == 8 && "Wrong scale");
|
||||||
|
|
||||||
|
Scale = 4;
|
||||||
|
IndJmpMatcher->annotate(*BC.MIA, *BC.Ctx.get(), "DeleteMe");
|
||||||
|
|
||||||
|
auto &LA = Info.getLivenessAnalysis();
|
||||||
|
MCPhysReg Reg = LA.scavengeRegAfter(&Inst);
|
||||||
|
assert(Reg != 0 && "Register scavenger failed!");
|
||||||
|
auto RegOp = MCOperand::createReg(Reg);
|
||||||
|
SmallVector<MCInst, 4> NewFrag;
|
||||||
|
|
||||||
|
BC.MIA->createIJmp32Frag(NewFrag, Base, MCOperand::createImm(Scale),
|
||||||
|
MCOperand::createReg(Index), Offset, RegOp);
|
||||||
|
BC.MIA->setJumpTable(BC.Ctx.get(), NewFrag.back(), JTAddr, Index);
|
||||||
|
|
||||||
|
JumpTable->OutputEntrySize = 4;
|
||||||
|
|
||||||
|
BB.replaceInstruction(&Inst, NewFrag.begin(), NewFrag.end());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool JTFootprintReduction::tryOptimizePIC(
|
||||||
|
BinaryContext &BC, BinaryBasicBlock &BB, MCInst &Inst, uint64_t JTAddr,
|
||||||
|
BinaryFunction::JumpTable *JumpTable, DataflowInfoManager &Info) {
|
||||||
|
MCPhysReg BaseReg;
|
||||||
|
uint64_t Scale;
|
||||||
|
MCPhysReg Index;
|
||||||
|
MCOperand Offset;
|
||||||
|
MCOperand JumpTableRef;
|
||||||
|
auto PICIndJmpMatcher = BC.MIA->matchIndJmp(BC.MIA->matchAdd(
|
||||||
|
BC.MIA->matchLoadAddr(BC.MIA->matchAnyOperand(JumpTableRef)),
|
||||||
|
BC.MIA->matchLoad(BC.MIA->matchReg(BaseReg), BC.MIA->matchImm(Scale),
|
||||||
|
BC.MIA->matchReg(Index), BC.MIA->matchAnyOperand())));
|
||||||
|
if (!PICIndJmpMatcher->match(*BC.MRI, *BC.MIA,
|
||||||
|
MutableArrayRef<MCInst>(&*BB.begin(), &Inst + 1),
|
||||||
|
-1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(Scale == 4 && "Wrong scale");
|
||||||
|
|
||||||
|
PICIndJmpMatcher->annotate(*BC.MIA, *BC.Ctx.get(), "DeleteMe");
|
||||||
|
|
||||||
|
auto RegOp = MCOperand::createReg(BaseReg);
|
||||||
|
SmallVector<MCInst, 4> NewFrag;
|
||||||
|
|
||||||
|
BC.MIA->createIJmp32Frag(NewFrag, MCOperand::createReg(0),
|
||||||
|
MCOperand::createImm(Scale),
|
||||||
|
MCOperand::createReg(Index), JumpTableRef, RegOp);
|
||||||
|
BC.MIA->setJumpTable(BC.Ctx.get(), NewFrag.back(), JTAddr, Index);
|
||||||
|
|
||||||
|
JumpTable->OutputEntrySize = 4;
|
||||||
|
// DePICify
|
||||||
|
JumpTable->Type = BinaryFunction::JumpTable::JTT_NORMAL;
|
||||||
|
|
||||||
|
BB.replaceInstruction(&Inst, NewFrag.begin(), NewFrag.end());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void JTFootprintReduction::optimizeFunction(BinaryContext &BC,
|
||||||
|
BinaryFunction &Function,
|
||||||
|
DataflowInfoManager &Info) {
|
||||||
|
for (auto &BB : Function) {
|
||||||
|
if (!BB.getNumNonPseudos())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MCInst &IndJmp = *BB.getLastNonPseudo();
|
||||||
|
uint64_t JTAddr = BC.MIA->getJumpTable(IndJmp);
|
||||||
|
|
||||||
|
if (!JTAddr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto *JumpTable = Function.getJumpTable(IndJmp);
|
||||||
|
if (BlacklistedJTs.count(JumpTable))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (tryOptimizeNonPIC(BC, BB, IndJmp, JTAddr, JumpTable, Info)
|
||||||
|
|| tryOptimizePIC(BC, BB, IndJmp, JTAddr, JumpTable, Info)) {
|
||||||
|
Modified.insert(&Function);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm_unreachable("Should either optimize PIC or NonPIC successfuly");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Modified.count(&Function))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (auto &BB : Function) {
|
||||||
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
||||||
|
if (BC.MIA->hasAnnotation(*I, "DeleteMe"))
|
||||||
|
BB.eraseInstruction(&*I);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void JTFootprintReduction::runOnFunctions(
|
||||||
|
BinaryContext &BC,
|
||||||
|
std::map<uint64_t, BinaryFunction> &BFs,
|
||||||
|
std::set<uint64_t> &LargeFunctions
|
||||||
|
) {
|
||||||
|
if (opts::JumpTables == JTS_BASIC && opts::Relocs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
BinaryFunctionCallGraph CG(buildCallGraph(BC, BFs));
|
||||||
|
RegAnalysis RA(BC, BFs, CG);
|
||||||
|
for (auto &BFIt : BFs) {
|
||||||
|
auto &Function = BFIt.second;
|
||||||
|
|
||||||
|
if (!Function.isSimple() || !opts::shouldProcess(Function))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (Function.getKnownExecutionCount() == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
DataflowInfoManager Info(BC, Function, &RA, nullptr);
|
||||||
|
BlacklistedJTs.clear();
|
||||||
|
checkOpportunities(BC, Function, Info);
|
||||||
|
optimizeFunction(BC, Function, Info);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TotalJTs == TotalJTsDenied) {
|
||||||
|
outs() << "BOLT-INFO: JT Footprint reduction: no changes were made.\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
outs() << "BOLT-INFO: JT Footprint reduction stats (simple funcs only):\n";
|
||||||
|
if (OptimizedScore) {
|
||||||
|
outs() << format("\t %.2lf%%", (OptimizedScore * 100.0 / TotalJTScore))
|
||||||
|
<< " of dynamic JT entries were reduced.\n";
|
||||||
|
}
|
||||||
|
outs() << "\t " << TotalJTs - TotalJTsDenied << " of " << TotalJTs
|
||||||
|
<< " jump tables affected.\n";
|
||||||
|
outs() << "\t " << IndJmps - IndJmpsDenied << " of " << IndJmps
|
||||||
|
<< " indirect jumps to JTs affected.\n";
|
||||||
|
outs() << "\t " << NumJTsBadMatch
|
||||||
|
<< " JTs discarded due to unsupported jump pattern.\n";
|
||||||
|
outs() << "\t " << NumJTsNoReg
|
||||||
|
<< " JTs discarded due to register unavailability.\n";
|
||||||
|
outs() << "\t " << BytesSaved
|
||||||
|
<< " bytes saved.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace bolt
|
||||||
|
} // namespace llvm
|
|
@ -0,0 +1,85 @@
|
||||||
|
//===--- JTFootprintReduction.h -------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Jump table footprint reduction pass
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_JT_FOOTPRINT_REDUCTION_H
|
||||||
|
#define LLVM_TOOLS_LLVM_BOLT_PASSES_JT_FOOTPRINT_REDUCTION_H
|
||||||
|
|
||||||
|
#include "BinaryPasses.h"
|
||||||
|
#include "DataflowInfoManager.h"
|
||||||
|
#include "DataReader.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
namespace bolt {
|
||||||
|
|
||||||
|
/// This pass identify indirect jumps to jump tables and reduce their entries
|
||||||
|
/// size from 8 to 4 bytes. For PIC jump tables, it will remove the PIC code
|
||||||
|
/// (since BOLT only process static code and it makes no sense to use expensive
|
||||||
|
/// PIC-style jumps in static code).
|
||||||
|
class JTFootprintReduction : public BinaryFunctionPass {
|
||||||
|
uint64_t TotalJTScore{0};
|
||||||
|
uint64_t TotalJTs{0};
|
||||||
|
uint64_t TotalJTsDenied{0};
|
||||||
|
uint64_t OptimizedScore{0};
|
||||||
|
uint64_t IndJmps{0};
|
||||||
|
uint64_t IndJmpsDenied{0};
|
||||||
|
uint64_t NumJTsBadMatch{0};
|
||||||
|
uint64_t NumJTsNoReg{0};
|
||||||
|
uint64_t BytesSaved{0};
|
||||||
|
DenseSet<BinaryFunction::JumpTable *> BlacklistedJTs;
|
||||||
|
DenseSet<const BinaryFunction *> Modified;
|
||||||
|
|
||||||
|
/// Check if \p Function presents jump tables where all jump locations can
|
||||||
|
/// be safely changed to use a different code sequence. If this is true, we
|
||||||
|
/// will be able to emit the whole table with a smaller entry size.
|
||||||
|
void checkOpportunities(BinaryContext &BC, BinaryFunction &Function,
|
||||||
|
DataflowInfoManager &Info);
|
||||||
|
|
||||||
|
/// The Non-PIC jump table optimization consists of reducing the jump table
|
||||||
|
/// entry size from 8 to 4 bytes. For that, we need to change the jump code
|
||||||
|
/// sequence from a single jmp * instruction to a pair of load32zext-jmp
|
||||||
|
/// instructions that depend on the availability of an extra register.
|
||||||
|
/// This saves dcache/dTLB at the expense of icache.
|
||||||
|
bool tryOptimizeNonPIC(BinaryContext &BC, BinaryBasicBlock &BB, MCInst &Inst,
|
||||||
|
uint64_t JTAddr, BinaryFunction::JumpTable *JumpTable,
|
||||||
|
DataflowInfoManager &Info);
|
||||||
|
|
||||||
|
/// The PIC jump table optimization consists of "de-pic-ifying" it, since the
|
||||||
|
/// PIC jump sequence is larger than its non-PIC counterpart, saving icache.
|
||||||
|
bool tryOptimizePIC(BinaryContext &BC, BinaryBasicBlock &BB, MCInst &Inst,
|
||||||
|
uint64_t JTAddr, BinaryFunction::JumpTable *JumpTable,
|
||||||
|
DataflowInfoManager &Info);
|
||||||
|
|
||||||
|
/// Run a pass for \p Function
|
||||||
|
void optimizeFunction(BinaryContext &BC, BinaryFunction &Function,
|
||||||
|
DataflowInfoManager &Info);
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit JTFootprintReduction(const cl::opt<bool> &PrintPass)
|
||||||
|
: BinaryFunctionPass(PrintPass) { }
|
||||||
|
|
||||||
|
/// BinaryPass interface functions
|
||||||
|
const char *getName() const override {
|
||||||
|
return "jt-footprint-reduction";
|
||||||
|
}
|
||||||
|
bool shouldPrint(const BinaryFunction &BF) const override {
|
||||||
|
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
|
||||||
|
}
|
||||||
|
void runOnFunctions(BinaryContext &BC,
|
||||||
|
std::map<uint64_t, BinaryFunction> &BFs,
|
||||||
|
std::set<uint64_t> &LargeFunctions) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace bolt
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
|
#endif
|
|
@ -60,8 +60,13 @@ public:
|
||||||
BitVector BV = *this->getStateAt(P);
|
BitVector BV = *this->getStateAt(P);
|
||||||
BV.flip();
|
BV.flip();
|
||||||
BitVector GPRegs(NumRegs, false);
|
BitVector GPRegs(NumRegs, false);
|
||||||
this->BC.MIA->getGPRegs(GPRegs);
|
this->BC.MIA->getGPRegs(GPRegs, /*IncludeAlias=*/false);
|
||||||
|
// Ignore the register used for frame pointer even if it is not alive (it
|
||||||
|
// may be used by CFI which is not represented in our dataflow).
|
||||||
|
auto FP = BC.MIA->getAliases(BC.MIA->getFramePointer());
|
||||||
|
FP.flip();
|
||||||
BV &= GPRegs;
|
BV &= GPRegs;
|
||||||
|
BV &= FP;
|
||||||
int Reg = BV.find_first();
|
int Reg = BV.find_first();
|
||||||
return Reg != -1 ? Reg : 0;
|
return Reg != -1 ? Reg : 0;
|
||||||
}
|
}
|
||||||
|
@ -74,6 +79,19 @@ protected:
|
||||||
void preflight() {}
|
void preflight() {}
|
||||||
|
|
||||||
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB) {
|
BitVector getStartingStateAtBB(const BinaryBasicBlock &BB) {
|
||||||
|
// Entry points start with default live out (registers used as return
|
||||||
|
// values).
|
||||||
|
if (BB.succ_size() == 0) {
|
||||||
|
BitVector State(NumRegs, false);
|
||||||
|
if (opts::AssumeABI) {
|
||||||
|
BC.MIA->getDefaultLiveOut(State);
|
||||||
|
BC.MIA->getCalleeSavedRegs(State);
|
||||||
|
} else {
|
||||||
|
State.set();
|
||||||
|
State.reset(BC.MIA->getFlagsReg());
|
||||||
|
}
|
||||||
|
return State;
|
||||||
|
}
|
||||||
return BitVector(NumRegs, false);
|
return BitVector(NumRegs, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,7 +118,15 @@ protected:
|
||||||
// because we don't really know what's going on.
|
// because we don't really know what's going on.
|
||||||
if (RA.isConservative(Written)) {
|
if (RA.isConservative(Written)) {
|
||||||
Written.reset();
|
Written.reset();
|
||||||
BC.MIA->getCalleeSavedRegs(Written);
|
BC.MIA->getDefaultLiveOut(Written);
|
||||||
|
// If ABI is respected, everything except CSRs should be dead after a
|
||||||
|
// call
|
||||||
|
if (opts::AssumeABI) {
|
||||||
|
auto CSR = BitVector(NumRegs, false);
|
||||||
|
BC.MIA->getCalleeSavedRegs(CSR);
|
||||||
|
CSR.flip();
|
||||||
|
Written |= CSR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Written.flip();
|
Written.flip();
|
||||||
|
@ -108,7 +134,26 @@ protected:
|
||||||
// Gen
|
// Gen
|
||||||
if (!this->BC.MIA->isCFI(Point)) {
|
if (!this->BC.MIA->isCFI(Point)) {
|
||||||
auto Used = BitVector(NumRegs, false);
|
auto Used = BitVector(NumRegs, false);
|
||||||
RA.getInstUsedRegsList(Point, Used, /*GetClobbers*/false);
|
if (IsCall) {
|
||||||
|
RA.getInstUsedRegsList(Point, Used, /*GetClobbers*/true);
|
||||||
|
if (RA.isConservative(Used)) {
|
||||||
|
Used = BC.MIA->getRegsUsedAsParams();
|
||||||
|
BC.MIA->getDefaultLiveOut(Used);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const auto InstInfo = BC.MII->get(Point.getOpcode());
|
||||||
|
for (unsigned I = 0, E = Point.getNumOperands(); I != E; ++I) {
|
||||||
|
if (!Point.getOperand(I).isReg() || I < InstInfo.getNumDefs())
|
||||||
|
continue;
|
||||||
|
Used |= BC.MIA->getAliases(Point.getOperand(I).getReg(),
|
||||||
|
/*OnlySmaller=*/false);
|
||||||
|
}
|
||||||
|
for (auto
|
||||||
|
I = InstInfo.getImplicitUses(),
|
||||||
|
E = InstInfo.getImplicitUses() + InstInfo.getNumImplicitUses();
|
||||||
|
I != E; ++I) {
|
||||||
|
Used |= BC.MIA->getAliases(*I, false);
|
||||||
|
}
|
||||||
if (IsCall &&
|
if (IsCall &&
|
||||||
(!BC.MIA->isTailCall(Point) || !BC.MIA->isConditionalBranch(Point))) {
|
(!BC.MIA->isTailCall(Point) || !BC.MIA->isConditionalBranch(Point))) {
|
||||||
// Never gen FLAGS from a non-conditional call... this is overly
|
// Never gen FLAGS from a non-conditional call... this is overly
|
||||||
|
|
Loading…
Reference in New Issue