[BOLT] Fix AND evaluation bug in shrink wrapping

Fix a bug where shrink-wrapping would use wrong stack offsets
because the stack was being aligned with an AND instruction, hence,
making its true offsets only available during runtime (we can't
statically determine where are the stack elements and we must give up
on this case).

Reviewed By: Amir

Differential Revision: https://reviews.llvm.org/D126110
This commit is contained in:
Rafael Auler 2022-05-20 17:42:58 -07:00
parent 98ff205fd8
commit c09cd64e5c
8 changed files with 85 additions and 29 deletions

View File

@ -909,12 +909,22 @@ public:
return false;
}
/// Use \p Input1 or Input2 as the current value for the input register and
/// put in \p Output the changes incurred by executing \p Inst. Return false
/// if it was not possible to perform the evaluation.
virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output,
std::pair<MCPhysReg, int64_t> Input1,
std::pair<MCPhysReg, int64_t> Input2) const {
/// Use \p Input1 or Input2 as the current value for the input
/// register and put in \p Output the changes incurred by executing
/// \p Inst. Return false if it was not possible to perform the
/// evaluation. evaluateStackOffsetExpr is restricted to operations
/// that have associativity with addition. Its intended usage is for
/// evaluating stack offset changes. In these cases, expressions
/// appear in the form of (x + offset) OP constant, where x is an
/// unknown base (such as stack base) but offset and constant are
/// known. In these cases, \p Output represents the new stack offset
/// after executing \p Inst. Because we don't know x, we can't
/// evaluate operations such as multiply or AND/OR, e.g. (x +
/// offset) OP constant is not the same as x + (offset OP constant).
virtual bool
evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
std::pair<MCPhysReg, int64_t> Input1,
std::pair<MCPhysReg, int64_t> Input2) const {
llvm_unreachable("not implemented");
return false;
}

View File

@ -115,7 +115,7 @@ protected:
else
FP = std::make_pair(0, 0);
int64_t Output;
if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
if (SPVal == EMPTY && FPVal == EMPTY)
return SPVal;
return SUPERPOSITION;
@ -150,7 +150,7 @@ protected:
else
SP = std::make_pair(0, 0);
int64_t Output;
if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
if (SPVal == EMPTY && FPVal == EMPTY)
return FPVal;
return SUPERPOSITION;

View File

@ -29,9 +29,9 @@ namespace {
bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst,
int64_t &Adjustment) {
return BC.MIB->evaluateSimple(Inst, Adjustment,
std::make_pair(BC.MIB->getStackPointer(), 0LL),
std::make_pair(0, 0LL));
return BC.MIB->evaluateStackOffsetExpr(
Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL),
std::make_pair(0, 0LL));
}
bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) {

View File

@ -246,7 +246,7 @@ void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
SP = std::make_pair(0, 0);
int64_t Output;
if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
return;
// Not your regular frame pointer initialization... bail
@ -294,7 +294,7 @@ void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
SP = std::make_pair(0, 0);
int64_t Output;
if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
return;
// If the value is the same of FP, no need to adjust it

View File

@ -134,7 +134,7 @@ BitVector StackAllocationAnalysis::computeNext(const MCInst &Point,
else
FP = std::make_pair(0, 0);
int64_t Output;
if (!MIB->evaluateSimple(Point, Output, SP, FP))
if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
return Next;
if (SPOffset < Output) {

View File

@ -261,8 +261,8 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const {
int64_t Output;
std::pair<MCPhysReg, int64_t> Input1 = std::make_pair(Reg, 0);
std::pair<MCPhysReg, int64_t> Input2 = std::make_pair(0, 0);
if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) {
LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n");
if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) {
LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n");
return false;
}
if (Offset + Output < 0 ||

View File

@ -1244,9 +1244,10 @@ public:
return false;
}
bool evaluateSimple(const MCInst &Inst, int64_t &Output,
std::pair<MCPhysReg, int64_t> Input1,
std::pair<MCPhysReg, int64_t> Input2) const override {
bool
evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
std::pair<MCPhysReg, int64_t> Input1,
std::pair<MCPhysReg, int64_t> Input2) const override {
auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr<int64_t> {
if (Reg == Input1.first)
@ -1260,16 +1261,6 @@ public:
default:
return false;
case X86::AND64ri32:
case X86::AND64ri8:
if (!Inst.getOperand(2).isImm())
return false;
if (ErrorOr<int64_t> InputVal =
getOperandVal(Inst.getOperand(1).getReg()))
Output = *InputVal & Inst.getOperand(2).getImm();
else
return false;
break;
case X86::SUB64ri32:
case X86::SUB64ri8:
if (!Inst.getOperand(2).isImm())

View File

@ -0,0 +1,55 @@
# This checks that shrink wrapping does attempt at accessing stack elements
# using RSP when the function is aligning RSP and changing offsets.
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \
# RUN: -frame-opt=all -simplify-conditional-tail-calls=false \
# RUN: -eliminate-unreachable=false | FileCheck %s
# Here we have a function that aligns the stack at prologue. Stack pointer
# analysis can't try to infer offset positions after AND because that depends
# on the runtime value of the stack pointer of callee (whether it is misaligned
# or not).
.globl _start
.type _start, %function
_start:
.cfi_startproc
# FDATA: 0 [unknown] 0 1 _start 0 0 1
push %rbp
mov %rsp, %rbp
push %rbx
push %r14
and $0xffffffffffffffe0,%rsp
subq $0x20, %rsp
b: je hot_path
# FDATA: 1 _start #b# 1 _start #hot_path# 0 1
cold_path:
mov %r14, %rdi
mov %rbx, %rdi
# Block push-pop mode by using an instruction that requires the
# stack to be aligned to 128B. This will force the pass
# to try to index stack elements by using RSP +offset directly, but
# we do not know how to access individual elements of the stack thanks
# to the alignment.
movdqa %xmm8, (%rsp)
addq $0x20, %rsp
pop %r14
pop %rbx
pop %rbp
ret
hot_path:
addq $0x20, %rsp
pop %r14
pop %rbx
pop %rbp
ret
.cfi_endproc
.size _start, .-_start
# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops