[lld-macho][re-land] Initial support for common symbols

Fix earlier build break via a static_cast.

This reverts commit 8112d494d3.

Differential Revision: https://reviews.llvm.org/D86909
This commit is contained in:
Jez Ng 2020-09-24 14:44:14 -07:00
parent dfc5a9eb57
commit c32e69b2ce
7 changed files with 177 additions and 5 deletions

View File

@ -436,6 +436,34 @@ static bool markSubLibrary(StringRef searchName) {
return false;
}
// Replaces common symbols with defined symbols residing in __common sections.
// This function must be called after all symbol names are resolved (i.e. after
// all InputFiles have been loaded.) As a result, later operations won't see
// any CommonSymbols.
static void replaceCommonSymbols() {
for (macho::Symbol *sym : symtab->getSymbols()) {
auto *common = dyn_cast<CommonSymbol>(sym);
if (common == nullptr)
continue;
auto *isec = make<InputSection>();
isec->file = common->file;
isec->name = section_names::common;
isec->segname = segment_names::data;
isec->align = common->align;
// Casting to size_t will truncate large values on 32-bit architectures,
// but it's not really worth supporting the linking of 64-bit programs on
// 32-bit archs.
isec->data = {nullptr, static_cast<size_t>(common->size)};
isec->flags = S_ZEROFILL;
inputSections.push_back(isec);
replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0,
/*isWeakDef=*/false,
/*isExternal=*/true);
}
}
static inline char toLowerDash(char x) {
if (x >= 'A' && x <= 'Z')
return x - 'A' + 'a';
@ -641,6 +669,8 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
error("-sub_library " + searchName + " does not match a supplied dylib");
}
replaceCommonSymbols();
StringRef orderFile = args.getLastArgValue(OPT_order_file);
if (!orderFile.empty())
parseOrderFile(orderFile);

View File

@ -243,10 +243,12 @@ void InputFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
for (size_t i = 0, n = nList.size(); i < n; ++i) {
const structs::nlist_64 &sym = nList[i];
// Undefined symbol
if (!sym.n_sect) {
if ((sym.n_type & N_TYPE) == N_UNDF) {
StringRef name = strtab + sym.n_strx;
symbols[i] = symtab->addUndefined(name);
symbols[i] = sym.n_value == 0
? symtab->addUndefined(name)
: symtab->addCommon(name, this, sym.n_value,
1 << GET_COMM_ALIGN(sym.n_desc));
continue;
}

View File

@ -74,6 +74,26 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
return s;
}
Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
uint32_t align) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
if (!wasInserted) {
if (auto *common = dyn_cast<CommonSymbol>(s)) {
if (size < common->size)
return s;
} else if (!isa<Undefined>(s)) {
error("TODO: implement common symbol resolution with other symbol kinds");
return s;
}
}
replaceSymbol<CommonSymbol>(s, name, file, size, align);
return s;
}
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool isTlv) {
Symbol *s;

View File

@ -19,6 +19,7 @@ namespace macho {
class ArchiveFile;
class DylibFile;
class InputFile;
class InputSection;
class MachHeaderSection;
class Symbol;
@ -36,6 +37,8 @@ public:
Symbol *addUndefined(StringRef name);
Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align);
Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
Symbol *addLazy(StringRef name, ArchiveFile *file,

View File

@ -14,6 +14,7 @@
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/MathExtras.h"
namespace lld {
namespace macho {
@ -36,6 +37,7 @@ public:
enum Kind {
DefinedKind,
UndefinedKind,
CommonKind,
DylibKind,
LazyKind,
DSOHandleKind,
@ -117,6 +119,35 @@ public:
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
};
// On Unix, it is traditionally allowed to write variable definitions without
// initialization expressions (such as "int foo;") to header files. These are
// called tentative definitions.
//
// Using tentative definitions is usually considered a bad practice; you should
// write only declarations (such as "extern int foo;") to header files.
// Nevertheless, the linker and the compiler have to do something to support
// bad code by allowing duplicate definitions for this particular case.
//
// The compiler creates common symbols when it sees tentative definitions.
// (You can suppress this behavior and let the compiler create a regular
// defined symbol by passing -fno-common.) When linking the final binary, if
// there are remaining common symbols after name resolution is complete, the
// linker converts them to regular defined symbols in a __common section.
class CommonSymbol : public Symbol {
public:
CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align)
: Symbol(CommonKind, name), file(file), size(size),
align(align != 1 ? align : llvm::PowerOf2Ceil(size)) {
// TODO: cap maximum alignment
}
static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
InputFile *const file;
const uint64_t size;
const uint32_t align;
};
class DylibSymbol : public Symbol {
public:
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
@ -185,8 +216,10 @@ public:
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
alignas(LazySymbol) char d[sizeof(LazySymbol)];
alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
alignas(LazySymbol) char e[sizeof(LazySymbol)];
alignas(DSOHandle) char f[sizeof(DSOHandle)];
};
template <typename T, typename... ArgT>

View File

@ -26,6 +26,7 @@ namespace macho {
namespace section_names {
constexpr const char pageZero[] = "__pagezero";
constexpr const char common[] = "__common";
constexpr const char header[] = "__mach_header";
constexpr const char binding[] = "__binding";
constexpr const char weakBinding[] = "__weak_binding";

View File

@ -0,0 +1,83 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/same-size.s -o %t/same-size.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/smaller-size.s -o %t/smaller-size.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align.s -o %t/zero-align.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align-round-up.s -o %t/zero-align-round-up.o
## Check that we pick the definition with the larger size, regardless of
## its alignment.
# RUN: lld -flavor darwinnew %t/test.o %t/smaller-size.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/smaller-size.o %t/test.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
## When the sizes are equal, we pick the symbol whose file occurs later in the
## command-line argument list.
# RUN: lld -flavor darwinnew %t/test.o %t/same-size.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/same-size.o %t/test.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/test.o %t/zero-align.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/zero-align.o %t/test.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/test.o %t/zero-align-round-up.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
# RUN: lld -flavor darwinnew %t/zero-align-round-up.o %t/test.o -order_file %t/order -o %t/test
# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
# SMALLER-ALIGNMENT-LABEL: Sections:
# SMALLER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS
# SMALLER-ALIGNMENT-LABEL: SYMBOL TABLE:
# SMALLER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size
# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker
# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 8]] g O __DATA,__common _check_alignment
# LARGER-ALIGNMENT-LABEL: Sections:
# LARGER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS
# LARGER-ALIGNMENT-LABEL: SYMBOL TABLE:
# LARGER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size
# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker
# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 16]] g O __DATA,__common _check_alignment
#--- order
## Order is important as we determine the size of a given symbol via the
## address of the next symbol.
_check_size
_end_marker
_check_alignment
#--- smaller-size.s
.comm _check_size, 1, 1
.comm _check_alignment, 1, 4
#--- same-size.s
.comm _check_size, 2, 1
.comm _check_alignment, 2, 4
#--- zero-align.s
.comm _check_size, 2, 1
## If alignment is set to zero, use the size to determine the alignment.
.comm _check_alignment, 16, 0
#--- zero-align-round-up.s
.comm _check_size, 2, 1
## If alignment is set to zero, use the size to determine the alignment. If the
## size is not a power of two, round it up. (In this case, 14 rounds to 16.)
.comm _check_alignment, 14, 0
#--- test.s
.comm _check_size, 2, 1
.comm _end_marker, 1
.comm _check_alignment, 2, 3
.globl _main
_main:
ret