forked from OSchip/llvm-project
[BOLT] Add aarch64 support for peephole passes
Enable peephole optimizations for aarch64. Also small code refactoring - add PeepholeOpts under Peepholes class. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei Differential Revision: https://reviews.llvm.org/D118732
This commit is contained in:
parent
2c26cfdef7
commit
19fb5a210d
|
@ -295,6 +295,16 @@ public:
|
|||
|
||||
/// Perform simple peephole optimizations.
|
||||
class Peepholes : public BinaryFunctionPass {
|
||||
public:
|
||||
enum PeepholeOpts : char {
|
||||
PEEP_NONE = 0x0,
|
||||
PEEP_DOUBLE_JUMPS = 0x2,
|
||||
PEEP_TAILCALL_TRAPS = 0x4,
|
||||
PEEP_USELESS_BRANCHES = 0x8,
|
||||
PEEP_ALL = 0xf
|
||||
};
|
||||
|
||||
private:
|
||||
uint64_t NumDoubleJumps{0};
|
||||
uint64_t TailCallTraps{0};
|
||||
uint64_t NumUselessCondBranches{0};
|
||||
|
|
|
@ -105,29 +105,19 @@ MinBranchClusters("min-branch-clusters",
|
|||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
enum PeepholeOpts : char {
|
||||
PEEP_NONE = 0x0,
|
||||
PEEP_DOUBLE_JUMPS = 0x2,
|
||||
PEEP_TAILCALL_TRAPS = 0x4,
|
||||
PEEP_USELESS_BRANCHES = 0x8,
|
||||
PEEP_ALL = 0xf
|
||||
};
|
||||
|
||||
static cl::list<PeepholeOpts>
|
||||
Peepholes("peepholes",
|
||||
cl::CommaSeparated,
|
||||
cl::desc("enable peephole optimizations"),
|
||||
cl::value_desc("opt1,opt2,opt3,..."),
|
||||
cl::values(
|
||||
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
|
||||
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
|
||||
"remove double jumps when able"),
|
||||
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
|
||||
clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
|
||||
"remove useless conditional branches"),
|
||||
clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
static cl::list<Peepholes::PeepholeOpts> Peepholes(
|
||||
"peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"),
|
||||
cl::value_desc("opt1,opt2,opt3,..."),
|
||||
cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"),
|
||||
clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps",
|
||||
"remove double jumps when able"),
|
||||
clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps",
|
||||
"insert tail call traps"),
|
||||
clEnumValN(Peepholes::PEEP_USELESS_BRANCHES, "useless-branches",
|
||||
"remove useless conditional branches"),
|
||||
clEnumValN(Peepholes::PEEP_ALL, "all",
|
||||
"enable all peephole optimizations")),
|
||||
cl::ZeroOrMore, cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
PrintFuncStat("print-function-statistics",
|
||||
|
@ -1092,20 +1082,20 @@ void Peepholes::removeUselessCondBranches(BinaryFunction &Function) {
|
|||
}
|
||||
|
||||
void Peepholes::runOnFunctions(BinaryContext &BC) {
|
||||
const char Opts = std::accumulate(
|
||||
opts::Peepholes.begin(), opts::Peepholes.end(), 0,
|
||||
[](const char A, const opts::PeepholeOpts B) { return A | B; });
|
||||
if (Opts == opts::PEEP_NONE || !BC.isX86())
|
||||
const char Opts =
|
||||
std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0,
|
||||
[](const char A, const PeepholeOpts B) { return A | B; });
|
||||
if (Opts == PEEP_NONE)
|
||||
return;
|
||||
|
||||
for (auto &It : BC.getBinaryFunctions()) {
|
||||
BinaryFunction &Function = It.second;
|
||||
if (shouldOptimize(Function)) {
|
||||
if (Opts & opts::PEEP_DOUBLE_JUMPS)
|
||||
if (Opts & PEEP_DOUBLE_JUMPS)
|
||||
NumDoubleJumps += fixDoubleJumps(Function, false);
|
||||
if (Opts & opts::PEEP_TAILCALL_TRAPS)
|
||||
if (Opts & PEEP_TAILCALL_TRAPS)
|
||||
addTailcallTraps(Function);
|
||||
if (Opts & opts::PEEP_USELESS_BRANCHES)
|
||||
if (Opts & PEEP_USELESS_BRANCHES)
|
||||
removeUselessCondBranches(Function);
|
||||
assert(Function.validateCFG());
|
||||
}
|
||||
|
|
|
@ -798,6 +798,13 @@ public:
|
|||
createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
|
||||
}
|
||||
|
||||
bool createTrap(MCInst &Inst) const override {
|
||||
Inst.clear();
|
||||
Inst.setOpcode(AArch64::BRK);
|
||||
Inst.addOperand(MCOperand::createImm(1));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool convertJmpToTailCall(MCInst &Inst) override {
|
||||
setTailCall(Inst);
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// A contrived example to test the double jump removal peephole.
|
||||
|
||||
// RUN: %clang %cflags -O0 %s -o %t.exe
|
||||
// RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=double-jumps | \
|
||||
// RUN: FileCheck %s -check-prefix=CHECKBOLT
|
||||
// RUN: llvm-objdump -d %t.bolt | FileCheck %s
|
||||
|
||||
// CHECKBOLT: BOLT-INFO: Peephole: 1 double jumps patched.
|
||||
|
||||
// CHECK: <_Z3foom>:
|
||||
// CHECK-NEXT: sub sp, sp, #16
|
||||
// CHECK-NEXT: str x0, [sp, #8]
|
||||
// CHECK-NEXT: ldr [[REG:x[0-28]+]], [sp, #8]
|
||||
// CHECK-NEXT: cmp [[REG]], #0
|
||||
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x34>
|
||||
// CHECK-NEXT: add [[REG]], [[REG]], #1
|
||||
// CHECK-NEXT: add [[REG]], [[REG]], #1
|
||||
// CHECK-NEXT: cmp [[REG]], #2
|
||||
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x28>
|
||||
// CHECK-NEXT: add [[REG]], [[REG]], #1
|
||||
// CHECK-NEXT: mov [[REG]], x1
|
||||
// CHECK-NEXT: ldr x1, [sp]
|
||||
// CHECK-NEXT: b {{.*}} <bar>
|
||||
// CHECK-NEXT: ldr x1, [sp]
|
||||
// CHECK-NEXT: add [[REG]], [[REG]], #1
|
||||
// CHECK-NEXT: b {{.*}} <bar>
|
||||
|
||||
extern "C" unsigned long bar(unsigned long count) { return count + 1; }
|
||||
|
||||
unsigned long foo(unsigned long count) {
|
||||
asm volatile(" cmp %0,#0\n"
|
||||
" b.eq .L7\n"
|
||||
" add %0, %0, #1\n"
|
||||
" b .L1\n"
|
||||
".L1: b .L2\n"
|
||||
".L2: add %0, %0, #1\n"
|
||||
" cmp %0, #2\n"
|
||||
" b.ne .L3\n"
|
||||
" b .L4\n"
|
||||
".L3: b .L5\n"
|
||||
".L5: add %0, %0, #1\n"
|
||||
".L4: mov %0,x1\n"
|
||||
" ldr x1, [sp]\n"
|
||||
" b .L6\n"
|
||||
".L7: ldr x1, [sp]\n"
|
||||
" add %0, %0, #1\n"
|
||||
" b .L6\n"
|
||||
".L6: b bar\n"
|
||||
:
|
||||
: "r"(count)
|
||||
:);
|
||||
return count;
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[]) { return foo(38); }
|
|
@ -0,0 +1,37 @@
|
|||
## Tests the peephole that adds trap instructions following indirect tail calls.
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=tailcall-traps \
|
||||
# RUN: -print-peepholes -funcs=foo,bar 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK: Binary Function "foo"
|
||||
# CHECK: br x0 # TAILCALL
|
||||
# CHECK-NEXT: brk #0x1
|
||||
# CHECK: End of Function "foo"
|
||||
|
||||
# CHECK: Binary Function "bar"
|
||||
# CHECK: b foo # TAILCALL
|
||||
# CHECK: End of Function "bar"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.global main
|
||||
.type main, %function
|
||||
main:
|
||||
nop
|
||||
ret
|
||||
.size main, .-main
|
||||
|
||||
.global foo
|
||||
.type foo, %function
|
||||
foo:
|
||||
br x0
|
||||
.size foo, .-foo
|
||||
|
||||
.global bar
|
||||
.type bar, %function
|
||||
bar:
|
||||
b foo
|
||||
.size bar, .-bar
|
|
@ -1,6 +1,4 @@
|
|||
/*
|
||||
* A contrived example to test the double jump removal peephole.
|
||||
*/
|
||||
// A contrived example to test the double jump removal peephole.
|
||||
|
||||
extern "C" unsigned long bar(unsigned long count) {
|
||||
return count + 1;
|
||||
|
|
Loading…
Reference in New Issue