llvm-project/lld/COFF/InputFiles.h

205 lines
6.1 KiB
C
Raw Normal View History

//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_INPUT_FILES_H
#define LLD_COFF_INPUT_FILES_H
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
#include <set>
#include <vector>
namespace lld {
namespace coff {
using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
using llvm::COFF::MachineTypes;
using llvm::object::Archive;
using llvm::object::COFFObjectFile;
using llvm::object::COFFSymbolRef;
using llvm::object::coff_import_header;
using llvm::object::coff_section;
class Chunk;
class Defined;
class DefinedImportData;
class DefinedImportThunk;
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-07-01 03:35:21 +08:00
class Lazy;
class SectionChunk;
struct Symbol;
class SymbolBody;
class Undefined;
// The root class of input files.
class InputFile {
public:
enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind };
Kind kind() const { return FileKind; }
virtual ~InputFile() {}
// Returns the filename.
StringRef getName() { return MB.getBufferIdentifier(); }
// Reads a file (the constructor doesn't do that).
virtual void parse() = 0;
// Returns the CPU type this file was compiled to.
virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
MemoryBufferRef MB;
// An archive file name if this file is created from an archive.
StringRef ParentName;
// Returns .drectve section contents if exist.
StringRef getDirectives() { return StringRef(Directives).trim(); }
protected:
InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
std::string Directives;
private:
const Kind FileKind;
};
// .lib or .a file.
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M);
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
void parse() override;
// Enqueues an archive member load for the given symbol. If we've already
// enqueued a load for the same archive member, this function does nothing,
// which ensures that we don't load the same member more than once.
void addMember(const Archive::Symbol *Sym);
private:
std::unique_ptr<Archive> File;
std::string Filename;
llvm::DenseSet<uint64_t> Seen;
};
// .obj or .o file. This may be a member of an archive file.
class ObjectFile : public InputFile {
public:
explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
void parse() override;
MachineTypes getMachineType() override;
std::vector<Chunk *> &getChunks() { return Chunks; }
std::vector<SectionChunk *> &getDebugChunks() { return DebugChunks; }
std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; }
// Returns a SymbolBody object for the SymbolIndex'th symbol in the
// underlying object file.
SymbolBody *getSymbolBody(uint32_t SymbolIndex) {
return SparseSymbolBodies[SymbolIndex];
}
// Returns the underying COFF file.
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
// True if this object file is compatible with SEH.
// COFF-specific and x86-only.
bool SEHCompat = false;
// The list of safe exception handlers listed in .sxdata section.
// COFF-specific and x86-only.
std::set<SymbolBody *> SEHandlers;
private:
void initializeChunks();
void initializeSymbols();
void initializeSEH();
SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst);
SymbolBody *createUndefined(COFFSymbolRef Sym);
std::unique_ptr<COFFObjectFile> COFFObj;
llvm::BumpPtrAllocator Alloc;
const coff_section *SXData = nullptr;
// List of all chunks defined by this file. This includes both section
// chunks and non-section chunks for common symbols.
std::vector<Chunk *> Chunks;
// CodeView debug info sections.
std::vector<SectionChunk *> DebugChunks;
// This vector contains the same chunks as Chunks, but they are
// indexed such that you can get a SectionChunk by section index.
// Nonexistent section indices are filled with null pointers.
// (Because section number is 1-based, the first slot is always a
// null pointer.)
std::vector<Chunk *> SparseChunks;
// List of all symbols referenced or defined by this file.
std::vector<SymbolBody *> SymbolBodies;
// This vector contains the same symbols as SymbolBodies, but they
// are indexed such that you can get a SymbolBody by symbol
// index. Nonexistent indices (which are occupied by auxiliary
// symbols in the real symbol table) are filled with null pointers.
std::vector<SymbolBody *> SparseSymbolBodies;
};
// This type represents import library members that contain DLL names
// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
// for details about the format.
class ImportFile : public InputFile {
public:
explicit ImportFile(MemoryBufferRef M)
: InputFile(ImportKind, M), StringAlloc(StringAllocAux) {}
static bool classof(const InputFile *F) { return F->kind() == ImportKind; }
DefinedImportData *ImpSym = nullptr;
DefinedImportThunk *ThunkSym = nullptr;
std::string DLLName;
private:
void parse() override;
llvm::BumpPtrAllocator StringAllocAux;
llvm::StringSaver StringAlloc;
public:
StringRef ExternalName;
const coff_import_header *Hdr;
Chunk *Location = nullptr;
};
// Used for LTO.
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; }
MachineTypes getMachineType() override;
std::unique_ptr<llvm::lto::InputFile> Obj;
private:
void parse() override;
std::vector<SymbolBody *> SymbolBodies;
};
} // namespace coff
std::string toString(coff::InputFile *File);
} // namespace lld
#endif