forked from OSchip/llvm-project
[BOLT] Move disassemble optimizations to optimization passes
Summary: The patch moves the shortenInstructions and nop remove to separate binary passes. As a result when llvm-bolt optimizations stage will begin the instructions of the binary functions will be absolutely the same as it was in the binary. This is needed for the golang support by llvm-bolt. Some of the tests must be changed, since bb alignment nops might create unreachable BBs in original functions. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei (cherry picked from FBD32896517)
This commit is contained in:
parent
46e93fb427
commit
08f56926c2
|
@ -691,6 +691,11 @@ public:
|
|||
return Instructions.erase(II);
|
||||
}
|
||||
|
||||
/// Erase non-pseudo instruction at a given \p Index
|
||||
void eraseInstructionAtIndex(unsigned Index) {
|
||||
eraseInstruction(Instructions.begin() + Index);
|
||||
}
|
||||
|
||||
/// Erase instructions in the specified range.
|
||||
template <typename ItrType>
|
||||
void eraseInstructions(ItrType Begin, ItrType End) {
|
||||
|
|
|
@ -1401,6 +1401,8 @@ public:
|
|||
layout_front()->isCold() != layout_back()->isCold();
|
||||
}
|
||||
|
||||
bool shouldPreserveNops() const { return PreserveNops; }
|
||||
|
||||
/// Return true if the function has exception handling tables.
|
||||
bool hasEHRanges() const { return HasEHRanges; }
|
||||
|
||||
|
|
|
@ -281,17 +281,25 @@ public:
|
|||
void runOnFunctions(BinaryContext &BC) override;
|
||||
};
|
||||
|
||||
/// Convert instructions to the form with the minimum operand width.
|
||||
class ShortenInstructions : public BinaryFunctionPass {
|
||||
uint64_t shortenInstructions(BinaryFunction &Function);
|
||||
|
||||
public:
|
||||
explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
|
||||
: BinaryFunctionPass(PrintPass) {}
|
||||
|
||||
const char *getName() const override { return "shorten-instructions"; }
|
||||
|
||||
void runOnFunctions(BinaryContext &BC) override;
|
||||
};
|
||||
|
||||
/// Perform simple peephole optimizations.
|
||||
class Peepholes : public BinaryFunctionPass {
|
||||
uint64_t NumShortened{0};
|
||||
uint64_t NumDoubleJumps{0};
|
||||
uint64_t TailCallTraps{0};
|
||||
uint64_t NumUselessCondBranches{0};
|
||||
|
||||
/// Attempt to use the minimum operand width for arithmetic, branch and
|
||||
/// move instructions.
|
||||
uint64_t shortenInstructions(BinaryContext &BC, BinaryFunction &Function);
|
||||
|
||||
/// Add trap instructions immediately after indirect tail calls to prevent
|
||||
/// the processor from decoding instructions immediate following the
|
||||
/// tailcall.
|
||||
|
@ -432,6 +440,20 @@ public:
|
|||
void runOnFunctions(BinaryContext &BC) override;
|
||||
};
|
||||
|
||||
/// Pass to remove nops in code
|
||||
class RemoveNops : public BinaryFunctionPass {
|
||||
void runOnFunction(BinaryFunction &Function);
|
||||
|
||||
public:
|
||||
explicit RemoveNops(const cl::opt<bool> &PrintPass)
|
||||
: BinaryFunctionPass(PrintPass) {}
|
||||
|
||||
const char *getName() const override { return "remove-nops"; }
|
||||
|
||||
/// Pass entry point
|
||||
void runOnFunctions(BinaryContext &BC) override;
|
||||
};
|
||||
|
||||
enum FrameOptimizationType : char {
|
||||
FOP_NONE, /// Don't perform FOP.
|
||||
FOP_HOT, /// Perform FOP on hot functions.
|
||||
|
|
|
@ -1316,10 +1316,6 @@ bool BinaryFunction::disassemble() {
|
|||
}
|
||||
}
|
||||
|
||||
// Convert instruction to a shorter version that could be relaxed if
|
||||
// needed.
|
||||
MIB->shortenInstruction(Instruction);
|
||||
|
||||
if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
|
||||
uint64_t TargetAddress = 0;
|
||||
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
|
||||
|
@ -1419,6 +1415,13 @@ add_instruction:
|
|||
MIB->addAnnotation(Instruction, "Offset", static_cast<uint32_t>(Offset));
|
||||
}
|
||||
|
||||
if (BC.MIB->isNoop(Instruction)) {
|
||||
// NOTE: disassembly loses the correct size information for noops.
|
||||
// E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
|
||||
// 5 bytes. Preserve the size info using annotations.
|
||||
MIB->addAnnotation(Instruction, "Size", static_cast<uint32_t>(Size));
|
||||
}
|
||||
|
||||
addInstruction(Offset, std::move(Instruction));
|
||||
}
|
||||
|
||||
|
@ -2027,13 +2030,6 @@ bool BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
|||
}
|
||||
}
|
||||
|
||||
// Ignore nops except SDT markers. We use nops to derive alignment of the
|
||||
// next basic block. It will not always work, as some blocks are naturally
|
||||
// aligned, but it's just part of heuristic for block alignment.
|
||||
if (MIB->isNoop(Instr) && !PreserveNops && !IsSDTMarker && !IsLKMarker) {
|
||||
IsLastInstrNop = true;
|
||||
continue;
|
||||
}
|
||||
if (!InsertBB) {
|
||||
// It must be a fallthrough or unreachable code. Create a new block unless
|
||||
// we see an unconditional branch following a conditional one. The latter
|
||||
|
|
|
@ -105,7 +105,6 @@ MinBranchClusters("min-branch-clusters",
|
|||
|
||||
enum PeepholeOpts : char {
|
||||
PEEP_NONE = 0x0,
|
||||
PEEP_SHORTEN = 0x1,
|
||||
PEEP_DOUBLE_JUMPS = 0x2,
|
||||
PEEP_TAILCALL_TRAPS = 0x4,
|
||||
PEEP_USELESS_BRANCHES = 0x8,
|
||||
|
@ -119,7 +118,6 @@ Peepholes("peepholes",
|
|||
cl::value_desc("opt1,opt2,opt3,..."),
|
||||
cl::values(
|
||||
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
|
||||
clEnumValN(PEEP_SHORTEN, "shorten", "perform instruction shortening"),
|
||||
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
|
||||
"remove double jumps when able"),
|
||||
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
|
||||
|
@ -1025,30 +1023,45 @@ void SimplifyConditionalTailCalls::runOnFunctions(BinaryContext &BC) {
|
|||
<< ".\n";
|
||||
}
|
||||
|
||||
uint64_t Peepholes::shortenInstructions(BinaryContext &BC,
|
||||
BinaryFunction &Function) {
|
||||
MCInst DebugInst;
|
||||
uint64_t ShortenInstructions::shortenInstructions(BinaryFunction &Function) {
|
||||
uint64_t Count = 0;
|
||||
const BinaryContext &BC = Function.getBinaryContext();
|
||||
for (BinaryBasicBlock &BB : Function) {
|
||||
for (MCInst &Inst : BB) {
|
||||
if (opts::Verbosity > 1) {
|
||||
DebugInst = Inst;
|
||||
}
|
||||
if (BC.MIB->shortenInstruction(Inst)) {
|
||||
if (opts::Verbosity > 1) {
|
||||
outs() << "BOLT-INFO: peephole, shortening:\n"
|
||||
<< "BOLT-INFO: ";
|
||||
BC.printInstruction(outs(), DebugInst, 0, &Function);
|
||||
outs() << "BOLT-INFO: to:";
|
||||
BC.printInstruction(outs(), Inst, 0, &Function);
|
||||
}
|
||||
++Count;
|
||||
MCInst OriginalInst;
|
||||
if (opts::Verbosity > 2)
|
||||
OriginalInst = Inst;
|
||||
|
||||
if (!BC.MIB->shortenInstruction(Inst))
|
||||
continue;
|
||||
|
||||
if (opts::Verbosity > 2) {
|
||||
outs() << "BOLT-INFO: shortening:\nBOLT-INFO: ";
|
||||
BC.printInstruction(outs(), OriginalInst, 0, &Function);
|
||||
outs() << "BOLT-INFO: to:";
|
||||
BC.printInstruction(outs(), Inst, 0, &Function);
|
||||
}
|
||||
|
||||
++Count;
|
||||
}
|
||||
}
|
||||
|
||||
return Count;
|
||||
}
|
||||
|
||||
void ShortenInstructions::runOnFunctions(BinaryContext &BC) {
|
||||
std::atomic<uint64_t> NumShortened{0};
|
||||
if (!BC.isX86())
|
||||
return;
|
||||
|
||||
ParallelUtilities::runOnEachFunction(
|
||||
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR,
|
||||
[&](BinaryFunction &BF) { NumShortened += shortenInstructions(BF); },
|
||||
nullptr, "ShortenInstructions");
|
||||
|
||||
outs() << "BOLT-INFO: " << NumShortened << " instructions were shortened\n";
|
||||
}
|
||||
|
||||
void Peepholes::addTailcallTraps(BinaryFunction &Function) {
|
||||
MCPlusBuilder *MIB = Function.getBinaryContext().MIB.get();
|
||||
for (BinaryBasicBlock &BB : Function) {
|
||||
|
@ -1099,8 +1112,6 @@ void Peepholes::runOnFunctions(BinaryContext &BC) {
|
|||
for (auto &It : BC.getBinaryFunctions()) {
|
||||
BinaryFunction &Function = It.second;
|
||||
if (shouldOptimize(Function)) {
|
||||
if (Opts & opts::PEEP_SHORTEN)
|
||||
NumShortened += shortenInstructions(BC, Function);
|
||||
if (Opts & opts::PEEP_DOUBLE_JUMPS)
|
||||
NumDoubleJumps += fixDoubleJumps(Function, false);
|
||||
if (Opts & opts::PEEP_TAILCALL_TRAPS)
|
||||
|
@ -1110,9 +1121,7 @@ void Peepholes::runOnFunctions(BinaryContext &BC) {
|
|||
assert(Function.validateCFG());
|
||||
}
|
||||
}
|
||||
outs() << "BOLT-INFO: Peephole: " << NumShortened
|
||||
<< " instructions shortened.\n"
|
||||
<< "BOLT-INFO: Peephole: " << NumDoubleJumps
|
||||
outs() << "BOLT-INFO: Peephole: " << NumDoubleJumps
|
||||
<< " double jumps patched.\n"
|
||||
<< "BOLT-INFO: Peephole: " << TailCallTraps
|
||||
<< " tail call traps inserted.\n"
|
||||
|
@ -1837,5 +1846,30 @@ void SpecializeMemcpy1::runOnFunctions(BinaryContext &BC) {
|
|||
}
|
||||
}
|
||||
|
||||
void RemoveNops::runOnFunction(BinaryFunction &BF) {
|
||||
const BinaryContext &BC = BF.getBinaryContext();
|
||||
for (BinaryBasicBlock &BB : BF) {
|
||||
for (int64_t I = BB.size() - 1; I >= 0; --I) {
|
||||
MCInst &Inst = BB.getInstructionAtIndex(I);
|
||||
if (BC.MIB->isNoop(Inst) && BC.MIB->hasAnnotation(Inst, "NOP"))
|
||||
BB.eraseInstructionAtIndex(I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveNops::runOnFunctions(BinaryContext &BC) {
|
||||
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
|
||||
runOnFunction(BF);
|
||||
};
|
||||
|
||||
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
|
||||
return BF.shouldPreserveNops();
|
||||
};
|
||||
|
||||
ParallelUtilities::runOnEachFunction(
|
||||
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
|
||||
SkipFunc, "RemoveNops");
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
|
|
@ -732,7 +732,22 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (To != ToBB->getOffset()) {
|
||||
bool OffsetMatches = !!(To == ToBB->getOffset());
|
||||
if (!OffsetMatches) {
|
||||
// Skip the nops to support old .fdata
|
||||
uint64_t Offset = ToBB->getOffset();
|
||||
for (MCInst &Instr : *ToBB) {
|
||||
if (!BC.MIB->isNoop(Instr))
|
||||
break;
|
||||
|
||||
Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
|
||||
}
|
||||
|
||||
if (To == Offset)
|
||||
OffsetMatches = true;
|
||||
}
|
||||
|
||||
if (!OffsetMatches) {
|
||||
// "To" could be referring to nop instructions in between 2 basic blocks.
|
||||
// While building the CFG we make sure these nops are attributed to the
|
||||
// previous basic block, thus we check if the destination belongs to the
|
||||
|
|
|
@ -408,6 +408,10 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
|
|||
|
||||
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
|
||||
|
||||
Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
|
||||
|
||||
Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
.globl main
|
||||
main:
|
||||
subq $0x2000000, (%r13)
|
||||
.byte 0x49,0x81,0x6d,0x00,0x02,0x00,0x00,0x00
|
||||
xorq %rax,%rax
|
||||
ret
|
|
@ -13,15 +13,14 @@ RUN: -funcs=main,SolveCubic,usqrt -sequential-disassembly 2>&1 | FileCheck %s
|
|||
CHECK: Binary Function "main"
|
||||
CHECK: BB Layout : .LBB00, .Ltmp1, .Ltmp0, .Ltmp3, .Ltmp2, .Ltmp5, .Ltmp4, .Ltmp7, .Ltmp6, .Ltmp9, .Ltmp8, .Ltmp11, .Ltmp10, .Ltmp13, .Ltmp12, .Ltmp15, .Ltmp14, .Ltmp21, .Ltmp20, .Ltmp19, .Ltmp18, .Ltmp17, .Ltmp16, .LFT0, .LFT1, .LFT2, .LFT3, .Ltmp22, .LFT4, .Ltmp23, .LFT5, .Ltmp24, .LFT6, .Ltmp25, .LFT7
|
||||
CHECK: Binary Function "SolveCubic"
|
||||
CHECK: BB Layout : .LBB01, .LFT8, .LFT9, .Ltmp33, .LFT10, .Ltmp28, .Ltmp26, .LFT11, .Ltmp37, .Ltmp36, .Ltmp35, .Ltmp34, .Ltmp27, .Ltmp32, .Ltmp31, .Ltmp30, .Ltmp29
|
||||
CHECK: BB Layout : .LBB01, .LFT8, .LFT9, .Ltmp33, .LFT10, .Ltmp28, .LFT11, .Ltmp26, .LFT12, .Ltmp37, .Ltmp36, .Ltmp35, .Ltmp34, .Ltmp27, .Ltmp32, .Ltmp31, .Ltmp30, .Ltmp29
|
||||
CHECK: Binary Function "usqrt"
|
||||
CHECK: BB Layout : .LBB02, .Ltmp39, .LFT12, .Ltmp38, .LFT13
|
||||
CHECK: BB Layout : .LBB02, .Ltmp39, .LFT13, .Ltmp38, .LFT14
|
||||
|
||||
# New order
|
||||
CHECK: Binary Function "main"
|
||||
CHECK: BB Layout : .LBB00, .Ltmp1, .Ltmp0, .Ltmp3, .Ltmp2, .Ltmp5, .Ltmp4, .Ltmp7, .Ltmp6, .Ltmp9, .Ltmp8, .Ltmp11, .Ltmp10, .Ltmp13, .Ltmp12, .Ltmp15, .Ltmp14, .Ltmp21, .Ltmp16, .Ltmp18, .Ltmp17, .LFT0, .Ltmp19, .LFT1, .Ltmp20, .LFT2, .LFT3, .Ltmp22, .LFT4, .Ltmp23, .LFT5, .Ltmp24, .LFT6, .Ltmp25, .LFT7
|
||||
CHECK: Binary Function "SolveCubic"
|
||||
CHECK: BB Layout : .LBB01, .Ltmp26, .LFT11, .Ltmp37, .Ltmp36, .Ltmp35, .Ltmp34, .LFT8, .LFT9, .Ltmp33, .Ltmp28, .LFT10, .Ltmp27, .Ltmp32, .Ltmp31, .Ltmp30, .Ltmp29
|
||||
CHECK: BB Layout : .LBB01, .Ltmp26, .LFT12, .Ltmp37, .Ltmp36, .Ltmp35, .Ltmp34, .LFT8, .LFT9, .Ltmp33, .Ltmp28, .LFT10, .Ltmp27, .Ltmp32, .Ltmp31, .Ltmp30, .Ltmp29
|
||||
CHECK: Binary Function "usqrt"
|
||||
CHECK: BB Layout : .LBB02, .Ltmp38, .Ltmp39, .LFT12, .LFT13
|
||||
|
||||
CHECK: BB Layout : .LBB02, .Ltmp38, .Ltmp39, .LFT13, .LFT14
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
# Checks that peephole pass works.
|
||||
|
||||
RUN: %clang %p/Inputs/peephole.s -o %t.exe
|
||||
RUN: llvm-bolt %t.exe -o %t -peepholes=shorten
|
||||
RUN: llvm-objdump -d --disassemble-symbols=main %t | FileCheck %s
|
||||
|
||||
CHECK: main
|
||||
CHECK-NEXT: 49 81 6d 00 00 00 00 02 subq $33554432, (%r13)
|
||||
CHECK-NEXT: 49 83 6d 00 02 subq $2, (%r13)
|
||||
CHECK-NEXT: 48 31 c0 xorq %rax, %rax
|
||||
CHECK-NEXT: c3 retq
|
|
@ -23,7 +23,7 @@ PERF2BOLT: 1 usqrt 30 1 usqrt 39 4 33
|
|||
PERF2BOLT: 1 usqrt 35 1 usqrt 39 0 22
|
||||
PERF2BOLT: 1 usqrt 3d 1 usqrt 10 0 58
|
||||
PERF2BOLT: 1 usqrt 3d 1 usqrt 3f 0 22
|
||||
PERF2BOLT: 1 usqrt 8 1 usqrt 10 0 22
|
||||
PERF2BOLT: 1 usqrt a 1 usqrt 10 0 22
|
||||
|
||||
NEWFORMAT: - name: usqrt
|
||||
NEWFORMAT: fid: 7
|
||||
|
@ -31,7 +31,7 @@ NEWFORMAT: exec: 0
|
|||
NEWFORMAT: nblocks: 5
|
||||
NEWFORMAT: blocks:
|
||||
NEWFORMAT: - bid: 0
|
||||
NEWFORMAT: insns: 3
|
||||
NEWFORMAT: insns: 4
|
||||
NEWFORMAT: succ: [ { bid: 1, cnt: 22 } ]
|
||||
NEWFORMAT: - bid: 1
|
||||
NEWFORMAT: insns: 9
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# that fit in 32 bits are shortened.
|
||||
|
||||
RUN: %clang %p/Inputs/asm_main.c %p/Inputs/shorten_mov.s -o %t.exe
|
||||
RUN: llvm-bolt %t.exe -peepholes=shorten -o %t
|
||||
RUN: llvm-bolt %t.exe -o %t
|
||||
RUN: llvm-objdump -d %t --print-imm-hex | FileCheck %s
|
||||
|
||||
CHECK: <foo>:
|
||||
|
|
Loading…
Reference in New Issue