[PATCH] Speculatively instantiate archive members

LLD parses archive file index table only at first. When it finds a symbol
it is looking for is defined in a member file in an archive file, it actually
reads the member from the archive file. That's done in the core linker.

That's a single-thread process since the core linker is single threaded.
If your command line contains a few object files and a lot of archive files
(which is quite often the case), LLD hardly utilizes hardware parallelism.

This patch improves parallelism by speculatively instantiating archive
file members. At the beginning of the core linking, we first create a map
containing all symbols defined in all members, and each time we find a
new undefined symbol, we instantiate a member file containing the
symbol (if such file exists). File instantiation is side effect free, so this
should not affect correctness.

This is a quick benchmark result. Time to link self-link LLD executable:

Linux   9.78s -> 8.50s (0.86x)
Windows 6.18s -> 4.51s (0.73x)

http://reviews.llvm.org/D7015

llvm-svn: 226336
This commit is contained in:
Rui Ueyama 2015-01-16 22:44:50 +00:00
parent 0e69c38d5d
commit 3a8d7e2f10
6 changed files with 106 additions and 8 deletions

View File

@ -11,6 +11,7 @@
#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H
#include "lld/Core/File.h"
#include "lld/Core/Parallel.h"
#include <set>
namespace lld {
@ -37,9 +38,15 @@ public:
virtual std::error_code
parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0;
// Parses a member file containing a given symbol, so that when you
// need the file find() can return that immediately. Calling this function
// has no side effect other than pre-instantiating a file. Calling this
// function doesn't affect correctness.
virtual void preload(TaskGroup &group, StringRef symbolName) {}
/// Returns a set of all defined symbols in the archive, i.e. all
/// resolvable symbol using this file.
virtual std::set<StringRef> getDefinedSymbols() const {
virtual std::set<StringRef> getDefinedSymbols() {
return std::set<StringRef>();
}

View File

@ -164,6 +164,14 @@ public:
std::error_code parse();
// This function is called just before the core linker tries to use
// a file. Currently the PECOFF reader uses this to trigger the
// driver to parse .drectve section (which contains command line options).
// If you want to do something having side effects, don't do that in
// doParse() because a file could be pre-loaded speculatively.
// Use this hook instead.
virtual void beforeLink() {}
// Usually each file owns a std::unique_ptr<MemoryBuffer>.
// However, there's one special case. If a file is an archive file,
// the archive file and its children all shares the same memory buffer.

View File

@ -10,6 +10,7 @@
#ifndef LLD_CORE_RESOLVER_H
#define LLD_CORE_RESOLVER_H
#include "lld/Core/ArchiveLibraryFile.h"
#include "lld/Core/File.h"
#include "lld/Core/SharedLibraryFile.h"
#include "lld/Core/Simple.h"
@ -63,6 +64,7 @@ private:
void maybeAddSectionGroupOrGnuLinkOnce(const DefinedAtom &atom);
/// \brief The main function that iterates over the files to resolve
void makePreloadArchiveMap();
bool resolveUndefines();
void updateReferences();
void deadStripOptimize();
@ -73,6 +75,7 @@ private:
void markLive(const Atom *atom);
void addAtoms(const std::vector<const DefinedAtom *>&);
void maybePreloadArchiveMember(StringRef sym);
class MergedFile : public SimpleFile {
public:
@ -93,6 +96,9 @@ private:
std::vector<File *> _files;
std::map<File *, bool> _newUndefinesAdded;
size_t _fileIndex;
// Preloading
std::map<StringRef, ArchiveLibraryFile *> _archiveMap;
};
} // namespace lld

View File

@ -31,9 +31,12 @@ bool Resolver::handleFile(const File &file) {
bool undefAdded = false;
for (const DefinedAtom *atom : file.defined())
doDefinedAtom(*atom);
for (const UndefinedAtom *atom : file.undefined())
if (doUndefinedAtom(*atom))
for (const UndefinedAtom *atom : file.undefined()) {
if (doUndefinedAtom(*atom)) {
undefAdded = true;
maybePreloadArchiveMember(atom->name());
}
}
for (const SharedLibraryAtom *atom : file.sharedLibrary())
doSharedLibraryAtom(*atom);
for (const AbsoluteAtom *atom : file.absolute())
@ -229,6 +232,17 @@ void Resolver::addAtoms(const std::vector<const DefinedAtom *> &newAtoms) {
doDefinedAtom(*newAtom);
}
// Instantiate an archive file member if there's a file containing a
// defined symbol for a given symbol name. Instantiation is done in a
// different worker thread and has no visible side effect.
void Resolver::maybePreloadArchiveMember(StringRef sym) {
auto it = _archiveMap.find(sym);
if (it == _archiveMap.end())
return;
ArchiveLibraryFile *archive = it->second;
archive->preload(_context.getTaskGroup(), sym);
}
// Returns true if at least one of N previous files has created an
// undefined symbol.
bool Resolver::undefinesAdded(int begin, int end) {
@ -261,6 +275,16 @@ File *Resolver::getFile(int &index, int &groupLevel) {
return cast<FileNode>(inputs[index++].get())->getFile();
}
// Make a map of Symbol -> ArchiveFile.
void Resolver::makePreloadArchiveMap() {
std::vector<std::unique_ptr<Node>> &nodes = _context.getNodes();
for (auto it = nodes.rbegin(), e = nodes.rend(); it != e; ++it)
if (auto *fnode = dyn_cast<FileNode>(it->get()))
if (auto *archive = dyn_cast<ArchiveLibraryFile>(fnode->getFile()))
for (StringRef sym : archive->getDefinedSymbols())
_archiveMap[sym] = archive;
}
// Keep adding atoms until _context.getNextFile() returns an error. This
// function is where undefined atoms are resolved.
bool Resolver::resolveUndefines() {
@ -277,6 +301,7 @@ bool Resolver::resolveUndefines() {
<< ": " << ec.message() << "\n";
return false;
}
file->beforeLink();
switch (file->kind()) {
case File::kindObject:
if (groupLevel > 0)
@ -446,6 +471,7 @@ void Resolver::removeCoalescedAwayAtoms() {
}
bool Resolver::resolve() {
makePreloadArchiveMap();
if (!resolveUndefines())
return false;
updateReferences();

View File

@ -17,7 +17,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include <future>
#include <memory>
#include <mutex>
#include <set>
#include <unordered_map>
@ -57,6 +59,17 @@ public:
return nullptr;
_membersInstantiated.insert(memberStart);
// Check if a file is preloaded.
{
std::lock_guard<std::mutex> lock(_mutex);
auto it = _preloaded.find(memberStart);
if (it != _preloaded.end()) {
std::future<const File *> &future = it->second;
return future.get();
}
}
std::unique_ptr<File> result;
if (instantiateMember(ci, result))
return nullptr;
@ -65,6 +78,37 @@ public:
return result.release();
}
// Instantiate a member file containing a given symbol name.
void preload(TaskGroup &group, StringRef name) override {
auto member = _symbolMemberMap.find(name);
if (member == _symbolMemberMap.end())
return;
Archive::child_iterator ci = member->second;
// Do nothing if a member is already instantiated.
const char *memberStart = ci->getBuffer().data();
if (_membersInstantiated.count(memberStart))
return;
std::lock_guard<std::mutex> lock(_mutex);
if (_preloaded.find(memberStart) != _preloaded.end())
return;
// Instantiate the member
auto *promise = new std::promise<const File *>;
_preloaded[memberStart] = promise->get_future();
_promises.push_back(std::unique_ptr<std::promise<const File *>>(promise));
group.spawn([=] {
std::unique_ptr<File> result;
if (instantiateMember(ci, result)) {
promise->set_value(nullptr);
return;
}
promise->set_value(result.release());
});
}
/// \brief parse each member
std::error_code
parseAllMembers(std::vector<std::unique_ptr<File>> &result) override {
@ -117,7 +161,8 @@ public:
}
/// Returns a set of all defined symbols in the archive.
std::set<StringRef> getDefinedSymbols() const override {
std::set<StringRef> getDefinedSymbols() override {
parse();
std::set<StringRef> ret;
for (const auto &e : _symbolMemberMap)
ret.insert(e.first);
@ -225,6 +270,9 @@ private:
atom_collection_vector<AbsoluteAtom> _absoluteAtoms;
bool _logLoading;
mutable std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers;
mutable std::map<const char *, std::future<const File *>> _preloaded;
mutable std::vector<std::unique_ptr<std::promise<const File *>>> _promises;
mutable std::mutex _mutex;
};
class ArchiveReader : public Reader {

View File

@ -106,6 +106,8 @@ public:
return _absoluteAtoms;
}
void beforeLink() override;
void addDefinedAtom(AliasAtom *atom) {
atom->setOrdinal(_ordinal++);
_definedAtoms._atoms.push_back(atom);
@ -382,7 +384,10 @@ std::error_code FileCOFF::doParse() {
// The mapping for /alternatename is in the context object. This helper
// function iterate over defined atoms and create alias atoms if needed.
createAlternateNameAtoms();
return std::error_code();
}
void FileCOFF::beforeLink() {
// Acquire the mutex to mutate _ctx.
std::lock_guard<std::recursive_mutex> lock(_ctx.getMutex());
@ -392,10 +397,8 @@ std::error_code FileCOFF::doParse() {
_ctx.setSafeSEH(false);
if (_ctx.deadStrip())
for (StringRef sym : undefinedSymbols)
_ctx.addDeadStripRoot(sym);
return std::error_code();
for (const UndefinedAtom *undef : undefined())
_ctx.addDeadStripRoot(undef->name());
}
/// Iterate over the symbol table to retrieve all symbols.