[BOLT] Fix sign issue when validating X86 relocations

Summary:
In analyzeRelocations, we extract the result of the relocation
from binary code to recreate the target of it in a few special cases.
For R_X86_64_32S relocations, however, we were neglecting the
possibility of the encoded value in the instruction to be negative.

(cherry picked from FBD24096347)
This commit is contained in:
Rafael Auler 2020-10-05 12:41:03 -07:00 committed by Maksim Panchenko
parent 2808c800e8
commit d7fb998637
4 changed files with 58 additions and 10 deletions

View File

@ -111,9 +111,9 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents,
uint64_t PC) {
switch (Type) {
default:
llvm_unreachable("unsupported relocation type");
case ELF::R_AARCH64_ABS64:
return Contents;
case ELF::R_X86_64_32S:
return SignExtend64<32>(Contents & 0xffffffff);
case ELF::R_AARCH64_PREL32:
return static_cast<int64_t>(PC) + SignExtend64<32>(Contents & 0xffffffff);
case ELF::R_AARCH64_TLSDESC_CALL:

View File

@ -57,7 +57,8 @@ struct Relocation {
/// Extract current relocated value from binary contents. This is used for
/// RISC architectures where values are encoded in specific bits depending
/// on the relocation value.
/// on the relocation value. For X86, we limit to sign extending the value
/// if necessary.
static uint64_t extractValue(uint64_t Type, uint64_t Contents, uint64_t PC);
/// Return true if relocation type is PC-relative. Return false otherwise.

View File

@ -767,6 +767,7 @@ void RewriteInstance::run() {
<< Triple::getArchTypeName(
(llvm::Triple::ArchType)InputFile->getArch())
<< "\n";
outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
discoverStorage();
readSpecialSections();
@ -1777,13 +1778,9 @@ bool RewriteInstance::analyzeRelocation(const RelocationRef &Rel,
auto Value = BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
assert(Value && "failed to extract relocated value");
ExtractedValue = *Value;
if (IsAArch64) {
ExtractedValue = Relocation::extractValue(RType,
ExtractedValue,
Rel.getOffset());
}
ExtractedValue = Relocation::extractValue(RType,
*Value,
Rel.getOffset());
Addend = getRelocationAddend(InputFile, Rel);
const auto IsPCRelative = Relocation::isPCRelative(RType);

View File

@ -0,0 +1,50 @@
# This reproduces a bug triggered by a relocation referencing a section symbol
# plus a negative reloc. BOLT handles such cases specially, but while doing so,
# it was failing to sign extend a negative result for the relocation (encoded
# in the immediate operand of an LEA instruction).
# Originally triggered by https://fossies.org/linux/glib/glib/guniprop.c
# Line 550: const gchar *p = special_case_table + val - 0x1000000;
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# Delete our BB symbols so BOLT doesn't mark them as entry points
# RUN: strip --strip-unneeded %t.o
# RUN: %host_cc %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -relocs=1 -print-finalized -print-only=main -o %t.out
# RUN: %t.out 1 2
.text
.globl main
.type main, %function
.p2align 4
main:
pushq %rbp
movq %rsp, %rbp
subq $0x18, %rsp
cmpl $0x2, %edi
jb .BBend
.BB2:
leaq .data-0x1000000, %rsi # Use a large negative addend to cause a
# negative result to be encoded in LEA
addq $0x1000000, %rsi # Eventually program logic compensates to get
# a real address
movq $2, %rbx
xorq %rax, %rax
movb (%rsi), %al
addq %rbx, %rax
movb %al, (%rsi)
leaq mystring, %rdi
callq puts
.BBend:
xorq %rax, %rax
leaveq
retq
.size main, .-main
.data
mystring: .asciz "0 is rbx mod 10 contents in decimal\n"