forked from OSchip/llvm-project
[BOLT] Fix AND evaluation bug in shrink wrapping
Fix a bug where shrink-wrapping would use wrong stack offsets because the stack was being aligned with an AND instruction, hence, making its true offsets only available during runtime (we can't statically determine where are the stack elements and we must give up on this case). Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D126110
This commit is contained in:
parent
98ff205fd8
commit
c09cd64e5c
|
@ -909,12 +909,22 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Use \p Input1 or Input2 as the current value for the input register and
|
||||
/// put in \p Output the changes incurred by executing \p Inst. Return false
|
||||
/// if it was not possible to perform the evaluation.
|
||||
virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output,
|
||||
std::pair<MCPhysReg, int64_t> Input1,
|
||||
std::pair<MCPhysReg, int64_t> Input2) const {
|
||||
/// Use \p Input1 or Input2 as the current value for the input
|
||||
/// register and put in \p Output the changes incurred by executing
|
||||
/// \p Inst. Return false if it was not possible to perform the
|
||||
/// evaluation. evaluateStackOffsetExpr is restricted to operations
|
||||
/// that have associativity with addition. Its intended usage is for
|
||||
/// evaluating stack offset changes. In these cases, expressions
|
||||
/// appear in the form of (x + offset) OP constant, where x is an
|
||||
/// unknown base (such as stack base) but offset and constant are
|
||||
/// known. In these cases, \p Output represents the new stack offset
|
||||
/// after executing \p Inst. Because we don't know x, we can't
|
||||
/// evaluate operations such as multiply or AND/OR, e.g. (x +
|
||||
/// offset) OP constant is not the same as x + (offset OP constant).
|
||||
virtual bool
|
||||
evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
|
||||
std::pair<MCPhysReg, int64_t> Input1,
|
||||
std::pair<MCPhysReg, int64_t> Input2) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -115,7 +115,7 @@ protected:
|
|||
else
|
||||
FP = std::make_pair(0, 0);
|
||||
int64_t Output;
|
||||
if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
|
||||
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
|
||||
if (SPVal == EMPTY && FPVal == EMPTY)
|
||||
return SPVal;
|
||||
return SUPERPOSITION;
|
||||
|
@ -150,7 +150,7 @@ protected:
|
|||
else
|
||||
SP = std::make_pair(0, 0);
|
||||
int64_t Output;
|
||||
if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
|
||||
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
|
||||
if (SPVal == EMPTY && FPVal == EMPTY)
|
||||
return FPVal;
|
||||
return SUPERPOSITION;
|
||||
|
|
|
@ -29,9 +29,9 @@ namespace {
|
|||
|
||||
bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst,
|
||||
int64_t &Adjustment) {
|
||||
return BC.MIB->evaluateSimple(Inst, Adjustment,
|
||||
std::make_pair(BC.MIB->getStackPointer(), 0LL),
|
||||
std::make_pair(0, 0LL));
|
||||
return BC.MIB->evaluateStackOffsetExpr(
|
||||
Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL),
|
||||
std::make_pair(0, 0LL));
|
||||
}
|
||||
|
||||
bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) {
|
||||
|
|
|
@ -246,7 +246,7 @@ void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
|
|||
SP = std::make_pair(0, 0);
|
||||
|
||||
int64_t Output;
|
||||
if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
|
||||
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
|
||||
return;
|
||||
|
||||
// Not your regular frame pointer initialization... bail
|
||||
|
@ -294,7 +294,7 @@ void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
|
|||
SP = std::make_pair(0, 0);
|
||||
|
||||
int64_t Output;
|
||||
if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
|
||||
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
|
||||
return;
|
||||
|
||||
// If the value is the same of FP, no need to adjust it
|
||||
|
|
|
@ -134,7 +134,7 @@ BitVector StackAllocationAnalysis::computeNext(const MCInst &Point,
|
|||
else
|
||||
FP = std::make_pair(0, 0);
|
||||
int64_t Output;
|
||||
if (!MIB->evaluateSimple(Point, Output, SP, FP))
|
||||
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
|
||||
return Next;
|
||||
|
||||
if (SPOffset < Output) {
|
||||
|
|
|
@ -261,8 +261,8 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const {
|
|||
int64_t Output;
|
||||
std::pair<MCPhysReg, int64_t> Input1 = std::make_pair(Reg, 0);
|
||||
std::pair<MCPhysReg, int64_t> Input2 = std::make_pair(0, 0);
|
||||
if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) {
|
||||
LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n");
|
||||
if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) {
|
||||
LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (Offset + Output < 0 ||
|
||||
|
|
|
@ -1244,9 +1244,10 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool evaluateSimple(const MCInst &Inst, int64_t &Output,
|
||||
std::pair<MCPhysReg, int64_t> Input1,
|
||||
std::pair<MCPhysReg, int64_t> Input2) const override {
|
||||
bool
|
||||
evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
|
||||
std::pair<MCPhysReg, int64_t> Input1,
|
||||
std::pair<MCPhysReg, int64_t> Input2) const override {
|
||||
|
||||
auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr<int64_t> {
|
||||
if (Reg == Input1.first)
|
||||
|
@ -1260,16 +1261,6 @@ public:
|
|||
default:
|
||||
return false;
|
||||
|
||||
case X86::AND64ri32:
|
||||
case X86::AND64ri8:
|
||||
if (!Inst.getOperand(2).isImm())
|
||||
return false;
|
||||
if (ErrorOr<int64_t> InputVal =
|
||||
getOperandVal(Inst.getOperand(1).getReg()))
|
||||
Output = *InputVal & Inst.getOperand(2).getImm();
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
case X86::SUB64ri32:
|
||||
case X86::SUB64ri8:
|
||||
if (!Inst.getOperand(2).isImm())
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
# This checks that shrink wrapping does attempt at accessing stack elements
|
||||
# using RSP when the function is aligning RSP and changing offsets.
|
||||
|
||||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: link_fdata %s %t.o %t.fdata
|
||||
# RUN: llvm-strip --strip-unneeded %t.o
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
|
||||
# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \
|
||||
# RUN: -frame-opt=all -simplify-conditional-tail-calls=false \
|
||||
# RUN: -eliminate-unreachable=false | FileCheck %s
|
||||
|
||||
# Here we have a function that aligns the stack at prologue. Stack pointer
|
||||
# analysis can't try to infer offset positions after AND because that depends
|
||||
# on the runtime value of the stack pointer of callee (whether it is misaligned
|
||||
# or not).
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
.cfi_startproc
|
||||
# FDATA: 0 [unknown] 0 1 _start 0 0 1
|
||||
push %rbp
|
||||
mov %rsp, %rbp
|
||||
push %rbx
|
||||
push %r14
|
||||
and $0xffffffffffffffe0,%rsp
|
||||
subq $0x20, %rsp
|
||||
b: je hot_path
|
||||
# FDATA: 1 _start #b# 1 _start #hot_path# 0 1
|
||||
cold_path:
|
||||
mov %r14, %rdi
|
||||
mov %rbx, %rdi
|
||||
# Block push-pop mode by using an instruction that requires the
|
||||
# stack to be aligned to 128B. This will force the pass
|
||||
# to try to index stack elements by using RSP +offset directly, but
|
||||
# we do not know how to access individual elements of the stack thanks
|
||||
# to the alignment.
|
||||
movdqa %xmm8, (%rsp)
|
||||
addq $0x20, %rsp
|
||||
pop %r14
|
||||
pop %rbx
|
||||
pop %rbp
|
||||
ret
|
||||
hot_path:
|
||||
addq $0x20, %rsp
|
||||
pop %r14
|
||||
pop %rbx
|
||||
pop %rbp
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _start, .-_start
|
||||
|
||||
# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops
|
Loading…
Reference in New Issue