[BOLT] Fix shrink wrapping to check pops

Summary:
Shrink wrapping has a mode where it will directly move push
pop pairs, instead of replacing them with stores/loads. This is an
ambitious mode that is triggered sometimes, but whenever matching with
a push, it would operate with the assumption that the restoring
instruction was a pop, not a load, otherwise it would assert. Fix this
assertion to bail nicely back to non-pushpop mode (use regular store and
load instructions).

(cherry picked from FBD20085905)
This commit is contained in:
Rafael Auler 2020-02-18 16:00:40 -08:00 committed by Maksim Panchenko
parent 2df4e7b99e
commit 340da8f294
6 changed files with 104 additions and 11 deletions

View File

@ -334,15 +334,15 @@ public:
void run() {
derived().preflight();
// Initialize state for all points of the function
for (auto &BB : Func) {
auto &St = getOrCreateStateAt(BB);
St = derived().getStartingStateAtBB(BB);
for (auto &Inst : BB) {
auto &St = getOrCreateStateAt(Inst);
St = derived().getStartingStateAtPoint(Inst);
}
// Initialize state for all points of the function
for (auto &BB : Func) {
auto &St = getOrCreateStateAt(BB);
St = derived().getStartingStateAtBB(BB);
for (auto &Inst : BB) {
auto &St = getOrCreateStateAt(Inst);
St = derived().getStartingStateAtPoint(Inst);
}
}
assert(Func.begin() != Func.end() && "Unexpected empty function");
std::queue<BinaryBasicBlock *> Worklist;

View File

@ -1065,6 +1065,13 @@ bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
return false;
}
}
// Abort if one of the restores for this CSR is not a POP.
for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
if (!BC.MIB->isPop(*Load)) {
DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
return false;
}
}
auto &SPT = Info.getStackPointerTracking();
// Abort if we are inserting a push into an entry BB (offset -8) and this

View File

@ -0,0 +1,60 @@
# This reproduces a bug with shrink wrapping when trying to move
# push-pops where restores were not validated to be POPs (and could be
# a regular load, which violated our assumptions). Check that we
# identify those cases.
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# Delete our BB symbols so BOLT doesn't mark them as entry points
# RUN: strip --strip-unneeded %t.o
# RUN: %host_cc %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -relocs=1 -frame-opt=all -print-finalized \
# RUN: -print-only=main -data %t.fdata -o %t.out | FileCheck %s
# RUN: %t.out
# CHECK: BOLT-INFO: Shrink wrapping moved 1 spills inserting load/stores and 0 spills inserting push/pops
.text
.globl main
.type main, %function
.p2align 4
main:
# FDATA: 0 [unknown] 0 1 main 0 0 510
pushq %rbp
movq %rsp, %rbp
pushq %rbx # We save rbx here, but there is an
# opportunity to move it to .LBB2
subq $0x18, %rsp
cmpl $0x2, %edi
.J1:
jb .BBend
# FDATA: 1 main #.J1# 1 main #.BB2# 0 10
# FDATA: 1 main #.J1# 1 main #.BBend# 0 500
.BB2:
movq $2, %rbx # Use rbx in a cold block
xorq %rax, %rax
movb mystring, %al
addq %rbx, %rax
movb %al, mystring
leaq mystring, %rdi
# Avoid making the actual call here to allow push-pop mode to operate
# without fear of an unknown function that may require aligned stack
# callq puts
.BBend:
mov -0x08(%rbp), %rbx # Original restore is here. Instead of a pop
# we use a load to reproduce gcc behavior while
# using leave in epilogue. Ordinarily it would
# addq $0x18, %rsp followed by pop %rbx
xorq %rax, %rax
leaveq
retq
.size main, .-main
.data
mystring: .asciz "0 is rbx mod 10 contents in decimal\n"

16
bolt/test/link_fdata.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/bash -e
grep -e '^# FDATA:' < "$1" | cut -c10- > "$3"
mapfile -t symbols < <(nm --defined-only "$2")
for line in "${symbols[@]}"; do
val=$(echo $line | cut -d' ' -f1)
symname=$(echo $line | cut -d' ' -f3)
if [ -z "$symname" ]; then
continue
fi
if [ -z "${val##*[!0-9a-fA-F]*}" ]; then
continue
fi
sed -i -e "s/\#${symname}\#/$val/g" $3
done

View File

@ -41,12 +41,21 @@ config.test_exec_root = os.path.join(config.bolt_obj_root, 'test')
llvm_config.use_default_substitutions()
tool_dirs = [config.llvm_tools_dir]
config.substitutions.append(('%host_cc', config.host_cc))
tool_dirs = [config.llvm_tools_dir,
config.test_source_root]
linker_tool = llvm_config.use_llvm_tool('ld', required=True)
tools = [
ToolSubst('llvm-bolt', unresolved='fatal'),
ToolSubst('perf2bolt', unresolved='fatal'),
ToolSubst('yaml2obj', unresolved='fatal'),
ToolSubst('llvm-mc', unresolved='fatal'),
ToolSubst('linker', command=linker_tool, unresolved='fatal'),
ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'),
]
llvm_config.add_tool_substitutions([], tool_dirs)
llvm_config.with_environment('PATH', tool_dirs, append_path=True)
llvm_config.add_tool_substitutions(tools, tool_dirs)
# Propagate path to symbolizer for ASan/MSan.
llvm_config.with_system_environment(

View File

@ -12,6 +12,7 @@ config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@"
config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
config.host_triple = "@LLVM_HOST_TRIPLE@"
config.target_triple = "@TARGET_TRIPLE@"
config.host_cc = "@HOST_CC@"
config.host_cxx = "@CMAKE_CXX_COMPILER@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.enable_shared = @ENABLE_SHARED@