From c09cd64e5c6dea6e97ef7d6cee5f689df2b408d7 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Fri, 20 May 2022 17:42:58 -0700 Subject: [PATCH] [BOLT] Fix AND evaluation bug in shrink wrapping Fix a bug where shrink-wrapping would use wrong stack offsets because the stack was being aligned with an AND instruction, hence, making its true offsets only available during runtime (we can't statically determine where are the stack elements and we must give up on this case). Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D126110 --- bolt/include/bolt/Core/MCPlusBuilder.h | 22 ++++++-- .../bolt/Passes/StackPointerTracking.h | 4 +- bolt/lib/Passes/AllocCombiner.cpp | 6 +- bolt/lib/Passes/ShrinkWrapping.cpp | 4 +- bolt/lib/Passes/StackAllocationAnalysis.cpp | 2 +- bolt/lib/Passes/ValidateInternalCalls.cpp | 4 +- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 17 ++---- bolt/test/X86/shrinkwrapping-and-rsp.s | 55 +++++++++++++++++++ 8 files changed, 85 insertions(+), 29 deletions(-) create mode 100644 bolt/test/X86/shrinkwrapping-and-rsp.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 1b4159dd932d..d4f5c4202292 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -909,12 +909,22 @@ public: return false; } - /// Use \p Input1 or Input2 as the current value for the input register and - /// put in \p Output the changes incurred by executing \p Inst. Return false - /// if it was not possible to perform the evaluation. - virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const { + /// Use \p Input1 or Input2 as the current value for the input + /// register and put in \p Output the changes incurred by executing + /// \p Inst. Return false if it was not possible to perform the + /// evaluation. evaluateStackOffsetExpr is restricted to operations + /// that have associativity with addition. Its intended usage is for + /// evaluating stack offset changes. In these cases, expressions + /// appear in the form of (x + offset) OP constant, where x is an + /// unknown base (such as stack base) but offset and constant are + /// known. In these cases, \p Output represents the new stack offset + /// after executing \p Inst. Because we don't know x, we can't + /// evaluate operations such as multiply or AND/OR, e.g. (x + + /// offset) OP constant is not the same as x + (offset OP constant). + virtual bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const { llvm_unreachable("not implemented"); return false; } diff --git a/bolt/include/bolt/Passes/StackPointerTracking.h b/bolt/include/bolt/Passes/StackPointerTracking.h index 535977a2ba4a..cbb4f88302bc 100644 --- a/bolt/include/bolt/Passes/StackPointerTracking.h +++ b/bolt/include/bolt/Passes/StackPointerTracking.h @@ -115,7 +115,7 @@ protected: else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return SPVal; return SUPERPOSITION; @@ -150,7 +150,7 @@ protected: else SP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return FPVal; return SUPERPOSITION; diff --git a/bolt/lib/Passes/AllocCombiner.cpp b/bolt/lib/Passes/AllocCombiner.cpp index 652111cf34a9..e92b74e130ef 100644 --- a/bolt/lib/Passes/AllocCombiner.cpp +++ b/bolt/lib/Passes/AllocCombiner.cpp @@ -29,9 +29,9 @@ namespace { bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst, int64_t &Adjustment) { - return BC.MIB->evaluateSimple(Inst, Adjustment, - std::make_pair(BC.MIB->getStackPointer(), 0LL), - std::make_pair(0, 0LL)); + return BC.MIB->evaluateStackOffsetExpr( + Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL), + std::make_pair(0, 0LL)); } bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) { diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp index f150965a67e0..f032735f8981 100644 --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -246,7 +246,7 @@ void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // Not your regular frame pointer initialization... bail @@ -294,7 +294,7 @@ void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // If the value is the same of FP, no need to adjust it diff --git a/bolt/lib/Passes/StackAllocationAnalysis.cpp b/bolt/lib/Passes/StackAllocationAnalysis.cpp index 92ad2ccac854..25975537a12a 100644 --- a/bolt/lib/Passes/StackAllocationAnalysis.cpp +++ b/bolt/lib/Passes/StackAllocationAnalysis.cpp @@ -134,7 +134,7 @@ BitVector StackAllocationAnalysis::computeNext(const MCInst &Point, else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return Next; if (SPOffset < Output) { diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp index 7faff7676ba1..d1246a2abf28 100644 --- a/bolt/lib/Passes/ValidateInternalCalls.cpp +++ b/bolt/lib/Passes/ValidateInternalCalls.cpp @@ -261,8 +261,8 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const { int64_t Output; std::pair Input1 = std::make_pair(Reg, 0); std::pair Input2 = std::make_pair(0, 0); - if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) { - LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n"); + if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) { + LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n"); return false; } if (Offset + Output < 0 || diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 610eafc60971..d6debda76707 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1244,9 +1244,10 @@ public: return false; } - bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const override { + bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const override { auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr { if (Reg == Input1.first) @@ -1260,16 +1261,6 @@ public: default: return false; - case X86::AND64ri32: - case X86::AND64ri8: - if (!Inst.getOperand(2).isImm()) - return false; - if (ErrorOr InputVal = - getOperandVal(Inst.getOperand(1).getReg())) - Output = *InputVal & Inst.getOperand(2).getImm(); - else - return false; - break; case X86::SUB64ri32: case X86::SUB64ri8: if (!Inst.getOperand(2).isImm()) diff --git a/bolt/test/X86/shrinkwrapping-and-rsp.s b/bolt/test/X86/shrinkwrapping-and-rsp.s new file mode 100644 index 000000000000..2228c6037e9c --- /dev/null +++ b/bolt/test/X86/shrinkwrapping-and-rsp.s @@ -0,0 +1,55 @@ +# This checks that shrink wrapping does attempt at accessing stack elements +# using RSP when the function is aligning RSP and changing offsets. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \ +# RUN: -frame-opt=all -simplify-conditional-tail-calls=false \ +# RUN: -eliminate-unreachable=false | FileCheck %s + +# Here we have a function that aligns the stack at prologue. Stack pointer +# analysis can't try to infer offset positions after AND because that depends +# on the runtime value of the stack pointer of callee (whether it is misaligned +# or not). + .globl _start + .type _start, %function +_start: + .cfi_startproc +# FDATA: 0 [unknown] 0 1 _start 0 0 1 + push %rbp + mov %rsp, %rbp + push %rbx + push %r14 + and $0xffffffffffffffe0,%rsp + subq $0x20, %rsp +b: je hot_path +# FDATA: 1 _start #b# 1 _start #hot_path# 0 1 +cold_path: + mov %r14, %rdi + mov %rbx, %rdi + # Block push-pop mode by using an instruction that requires the + # stack to be aligned to 128B. This will force the pass + # to try to index stack elements by using RSP +offset directly, but + # we do not know how to access individual elements of the stack thanks + # to the alignment. + movdqa %xmm8, (%rsp) + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret +hot_path: + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret + .cfi_endproc + .size _start, .-_start + +# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops