forked from OSchip/llvm-project
[BOLT-X86] Fix instrumentation issue with indirect calls
Summary: Indirect calls that use RSP to compute the target address would break in instrumentation mode because we were adding instructions that changed the stack pointer. Fix this. (cherry picked from FBD20883791)
This commit is contained in:
parent
401fa5b493
commit
6dbd15bc01
|
@ -276,6 +276,9 @@ bool Instrumentation::instrumentOneTarget(
|
|||
void Instrumentation::instrumentFunction(BinaryContext &BC,
|
||||
BinaryFunction &Function,
|
||||
MCPlusBuilder::AllocatorIdTy AllocId) {
|
||||
if (Function.hasUnknownControlFlow())
|
||||
return;
|
||||
|
||||
SplitWorklistTy SplitWorklist;
|
||||
SplitInstrsTy SplitInstrs;
|
||||
|
||||
|
|
|
@ -3186,28 +3186,57 @@ public:
|
|||
createInstrumentedIndirectCall(const MCInst &CallInst, bool TailCall,
|
||||
MCSymbol *HandlerFuncAddr, int CallSiteID,
|
||||
MCContext *Ctx) const override {
|
||||
std::vector<MCInst> Insts(6);
|
||||
// Check if the target address expression used in the original indirect call
|
||||
// uses the stack pointer, which we are going to clobber.
|
||||
static BitVector SPAliases(getAliases(X86::RSP));
|
||||
bool UsesSP{false};
|
||||
// Skip defs.
|
||||
for (unsigned I = Info->get(CallInst.getOpcode()).getNumDefs(),
|
||||
E = MCPlus::getNumPrimeOperands(CallInst); I != E; ++I) {
|
||||
const auto &Operand = CallInst.getOperand(I);
|
||||
if (Operand.isReg() && SPAliases[Operand.getReg()]) {
|
||||
UsesSP = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MCInst> Insts;
|
||||
MCPhysReg TempReg = getIntArgRegister(0);
|
||||
// Code sequence used to enter indirect call instrumentation helper:
|
||||
// push %rdi
|
||||
// add $8, %rsp ;; $rsp may be used in target, so fix it to prev val
|
||||
// movq target, %rdi ;; via convertIndirectCallTargetToLoad
|
||||
// sub $8, %rsp ;; restore correct stack value
|
||||
// push %rdi
|
||||
// movq $CallSiteID, %rdi
|
||||
// push %rdi
|
||||
// callq/jmp *HandlerFuncAddr
|
||||
createPushRegister(Insts[0], TempReg, 8);
|
||||
Insts[1] = CallInst;
|
||||
convertIndirectCallToLoad(Insts[1], TempReg);
|
||||
createPushRegister(Insts[2], TempReg, 8);
|
||||
createLoadImmediate(Insts[3], TempReg, CallSiteID);
|
||||
createPushRegister(Insts[4], TempReg, 8);
|
||||
createIndirectCall(Insts[5], HandlerFuncAddr, Ctx,
|
||||
Insts.emplace_back();
|
||||
createPushRegister(Insts.back(), TempReg, 8);
|
||||
if (UsesSP) { // Only adjust SP if we really need to
|
||||
Insts.emplace_back();
|
||||
createStackPointerDecrement(Insts.back(), 8, /*NoFlagsClobber=*/false);
|
||||
}
|
||||
Insts.emplace_back(CallInst);
|
||||
convertIndirectCallToLoad(Insts.back(), TempReg);
|
||||
if (UsesSP) {
|
||||
Insts.emplace_back();
|
||||
createStackPointerIncrement(Insts.back(), 8, /*NoFlagsClobber=*/false);
|
||||
}
|
||||
Insts.emplace_back();
|
||||
createPushRegister(Insts.back(), TempReg, 8);
|
||||
Insts.emplace_back();
|
||||
createLoadImmediate(Insts.back(), TempReg, CallSiteID);
|
||||
Insts.emplace_back();
|
||||
createPushRegister(Insts.back(), TempReg, 8);
|
||||
Insts.emplace_back();
|
||||
createIndirectCall(Insts.back(), HandlerFuncAddr, Ctx,
|
||||
/*TailCall=*/TailCall);
|
||||
// Carry over metadata
|
||||
for (int I = MCPlus::getNumPrimeOperands(CallInst),
|
||||
E = CallInst.getNumOperands();
|
||||
I != E; ++I) {
|
||||
Insts[5].addOperand(CallInst.getOperand(I));
|
||||
Insts.back().addOperand(CallInst.getOperand(I));
|
||||
}
|
||||
return Insts;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
# This reproduces a bug with instrumentation when trying to count calls
|
||||
# when the target address is computed with a referece to the stack pointer.
|
||||
# Our instrumentation code uses the stack to save registers to be
|
||||
# transparent with the instrumented code, but we end up updating the stack
|
||||
# pointer while doing so, which affects this target address calculation.
|
||||
# The solution is to temporarily fix RSP. Check that we correctly instrument
|
||||
# these cases.
|
||||
|
||||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: %host_cc %t.o -o %t.exe -Wl,-q
|
||||
|
||||
# RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
|
||||
# RUN: -o %t.instrumented
|
||||
|
||||
# Instrumented program needs to finish returning zero
|
||||
# RUN: %t.instrumented arg1 arg2
|
||||
|
||||
# Test that the instrumented data makes sense
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
|
||||
# RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \
|
||||
# RUN: -print-only=main -print-finalized | FileCheck %s
|
||||
|
||||
# RUN: %t.bolted arg1 arg2
|
||||
|
||||
# Check that our indirect call has 1 hit recorded in the fdata file and that
|
||||
# this was processed correctly by BOLT
|
||||
# CHECK: callq *0x18(%rsp) # CallProfile: 1 (0 misses) :
|
||||
# CHECK-NEXT: { targetFunc: 1 (0 misses) }
|
||||
|
||||
.text
|
||||
.globl main
|
||||
.type main, %function
|
||||
.p2align 4
|
||||
main:
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
leaq targetFunc, %rax
|
||||
pushq %rax # We save the target function address in the stack
|
||||
subq $0x18, %rsp # Set up a dummy stack frame
|
||||
cmpl $0x2, %edi
|
||||
jb .LBBerror # Add control flow so we don't have a trivial case
|
||||
.LBB2:
|
||||
callq *0x18(%rsp) # Indirect call using %rsp
|
||||
addq $0x20, %rsp
|
||||
movq %rbp, %rsp
|
||||
pop %rbp
|
||||
retq
|
||||
|
||||
.LBBerror:
|
||||
addq $0x20, %rsp
|
||||
movq %rbp, %rsp
|
||||
pop %rbp
|
||||
movq $1, %rax # Finish with an error if we go this path
|
||||
retq
|
||||
.size main, .-main
|
||||
|
||||
.globl targetFunc
|
||||
.type targetFunc, %function
|
||||
.p2align 4
|
||||
targetFunc:
|
||||
xorq %rax, %rax
|
||||
retq
|
||||
.size targetFunc, .-targetFunc
|
Loading…
Reference in New Issue