diff --git a/lld/lib/ReaderWriter/PECOFF/Atoms.h b/lld/lib/ReaderWriter/PECOFF/Atoms.h index 12affa9881fe..d33d99b5eb44 100644 --- a/lld/lib/ReaderWriter/PECOFF/Atoms.h +++ b/lld/lib/ReaderWriter/PECOFF/Atoms.h @@ -235,28 +235,22 @@ private: class COFFSharedLibraryAtom : public SharedLibraryAtom { public: - enum class Kind { - DATA, FUNC - }; + COFFSharedLibraryAtom(const File &file, StringRef symbolName, + StringRef originalName, StringRef loadName) + : _file(file), _symbolName(symbolName), _loadName(loadName), + _originalName(originalName) {} virtual const File &file() const { return _file; } virtual StringRef name() const { return _symbolName; } virtual StringRef loadName() const { return _loadName; } virtual bool canBeNullAtRuntime() const { return false; } - - Kind getKind() const { return _kind; } - -protected: - COFFSharedLibraryAtom(const File &file, StringRef symbolName, - StringRef loadName, Kind kind) - : _file(file), _symbolName(symbolName), _loadName(loadName), _kind(kind) { - } + virtual StringRef originalName() const { return _originalName; } private: const File &_file; StringRef _symbolName; StringRef _loadName; - Kind _kind; + StringRef _originalName; }; //===----------------------------------------------------------------------===// diff --git a/lld/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h b/lld/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h index d29c71b5fc77..4ef48c634b00 100644 --- a/lld/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h +++ b/lld/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h @@ -45,6 +45,7 @@ #include #include +using lld::coff::COFFBaseDefinedAtom; using lld::coff::COFFDefinedAtom; namespace lld { @@ -76,8 +77,8 @@ private: SectionToAtomsT filterHeadAtoms(MutableFile &mutableFile) const { SectionToAtomsT result; for (const DefinedAtom *atom : mutableFile.defined()) { - auto *coffAtom = (COFFDefinedAtom *)atom; - if (coffAtom->ordinal() == 0) + auto *coffAtom = dyn_cast((COFFBaseDefinedAtom *)atom); + if (coffAtom && coffAtom->ordinal() == 0) result[coffAtom->getSectionName()].push_back(coffAtom); } return std::move(result); diff --git a/lld/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp b/lld/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp index 200c7bbc6ff9..e494be991458 100644 --- a/lld/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp +++ b/lld/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp @@ -7,8 +7,11 @@ // //===----------------------------------------------------------------------===// /// -/// \file \brief This file provides a way to read an import library -/// member in a .lib file. +/// \file \brief This file provides a way to read an import library member in a +/// .lib file. +/// +/// Archive Files in Windows +/// ======================== /// /// In Windows, archive files with .lib file extension serve two different /// purposes. @@ -17,19 +20,90 @@ /// normal .obj files and is used for static linking. This is the same /// usage as .a file in Unix. /// -/// - For dynamic linking: An archive file in this case contains pseudo .obj -/// files to describe exported symbols of a DLL. Each .obj file in an archive -/// has a name of an exported symbol and a DLL filename from which the symbol -/// can be imported. When you link a DLL on Windows, you pass the name of the -/// .lib file for the DLL instead of the DLL filename itself. That is the -/// Windows way of linking a shared library. +/// - For dynamic linking: An archive file in this use case contains pseudo +/// .obj files to describe exported symbols of a DLL. Each pseudo .obj file +/// in an archive has a name of an exported symbol and a DLL filename from +/// which the symbol can be imported. When you link a DLL on Windows, you +/// pass the name of the .lib file for the DLL instead of the DLL filename +/// itself. That is the Windows way of linking against a shared library. /// -/// This file contains a function to parse the pseudo object file. +/// This file contains a function to handle the pseudo object file. +/// +/// Windows Loader and Import Address Table +/// ======================================= +/// +/// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs +/// contains a list of DLL names and list of symbols that need to be resolved by +/// the loader. Windows loader maps the executable and all the DLLs to memory, +/// resolves the symbols referencing items in DLLs, and updates the import +/// address table in memory. The import address table is an array of pointers to +/// all of the data or functions in DLL referenced by the executable. You cannot +/// access items in DLLs directly. They have to be accessed through an extra +/// level of indirection. +/// +/// So, if you want to access an item in DLL, you have to go through a +/// pointer. How do you actually do that? For each symbol in DLL, there is +/// another set of symbols with "_imp__" prefix. For example, if you have a +/// global variable "foo" in a DLL, a pointer to the variable is exported from +/// the DLL as "_imp__foo". You cannot directly use "foo" but need to go through +/// "_imp__foo", because symbol "foo" is not exported. +/// +/// Is this OK? That's not that complicated. Because items in a DLL are not +/// directly accessible, you need to access through a pointer, and the pointer +/// is available as a symbol with "_imp__" prefix. +/// +/// Trick 1: Although you can write code with "_imp__" prefix, today's compiler +/// and linker let you write code as if there's no extra level of +/// indirection. That's why you haven't seen lots of _imp__ in your code. A +/// variable or a function declared with "dllimport" attributes is treated as an +/// item in a DLL, and the compiler automatically mangles its name and inserts +/// the extra level of indirection when accessing the item. Here are some +/// examples: +/// +/// __declspec(dllimport) int var_in_dll; +/// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3; +/// +/// __declspec(dllimport) int fn_in_dll(void); +/// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)(); +/// +/// It's just the compiler rewrites code for you so that you don't need to +/// handle the indirection youself. +/// +/// Trick 2: __declspec(dllimport) is mandatory for data but optional for +/// function. For a function, the linker creates a jump table with the original +/// symbol name, so that the function is accessible without "_imp__" prefix. The +/// same function in a DLL can be called through two different symbols if it's +/// not dllimport'ed. +/// +/// (*_imp__fn)() +/// fn() +/// +/// The above functions do the same thing. fn's content is a JMP instruction to +/// branch to the address pointed by _imp__fn. The latter may be a little bit +/// slower than the former because it will execute the extra JMP instruction, but +/// that's not an important point here. +/// +/// If a function is dllimport'ed, which is usually done in a header file, +/// mangled name will be used at compile time so the jump table will not be +/// used. +/// +/// Because there's no way to hide the indirection for data access at link time, +/// data has to be accessed through dllimport'ed symbols or explicit "_imp__" +/// prefix. +/// +/// Creating Atoms for the Import Address Table +/// =========================================== +/// +/// This file is to read a pseudo object file and create at most two atoms. One +/// is a shared library atom for "_imp__" symbol. The another is a defined atom +/// for the JMP instruction if the symbol is for a function. /// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ReaderImportHeader" +#include "Atoms.h" + #include "lld/Core/File.h" #include "lld/Core/Error.h" #include "lld/Core/SharedLibraryAtom.h" @@ -58,20 +132,25 @@ namespace coff { namespace { -class COFFDynamicAtom : public SharedLibraryAtom { +/// The defined atom for jump table. +class FuncAtom : public COFFBaseDefinedAtom { public: - COFFDynamicAtom(File &file, StringRef symbolName, StringRef dllName) - : _owningFile(file), _symbolName(symbolName), _dllName(dllName) {} + FuncAtom(const File &file, StringRef symbolName) + : COFFBaseDefinedAtom(file, symbolName, &rawContent) {} - virtual const File &file() const { return _owningFile; } - virtual StringRef name() const { return _symbolName; } - virtual StringRef loadName() const { return _dllName; } - virtual bool canBeNullAtRuntime() const { return true; } + virtual uint64_t ordinal() const { return 0; } + virtual Scope scope() const { return scopeGlobal; } + virtual ContentType contentType() const { return typeCode; } + virtual Alignment alignment() const { return Alignment(1); } + virtual ContentPermissions permissions() const { return permR_X; } private: - const File &_owningFile; - StringRef _symbolName; - StringRef _dllName; + static std::vector rawContent; +}; + +std::vector FuncAtom::rawContent = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *0x0 + 0x90, 0x90, // nop; nop }; class FileImportLibrary : public File { @@ -96,14 +175,17 @@ public: StringRef symbolName(buf + 20); StringRef dllName(buf + 20 + symbolName.size() + 1); - auto *atom = new (allocator.Allocate()) - COFFDynamicAtom(*this, symbolName, dllName); - _sharedLibraryAtoms._atoms.push_back(atom); + const COFFSharedLibraryAtom *dataAtom = addSharedLibraryAtom(symbolName, + dllName); + int type = *reinterpret_cast(buf + 18) >> 16; + if (type == llvm::COFF::IMPORT_CODE) + addDefinedAtom(symbolName, dllName, dataAtom); + ec = error_code::success(); } virtual const atom_collection &defined() const { - return _noDefinedAtoms; + return _definedAtoms; } virtual const atom_collection &undefined() const { @@ -121,6 +203,28 @@ public: virtual const TargetInfo &getTargetInfo() const { return _targetInfo; } private: + const COFFSharedLibraryAtom *addSharedLibraryAtom(StringRef symbolName, + StringRef dllName) { + auto *name = new (allocator.Allocate()) std::string("__imp_"); + name->append(symbolName); + auto *atom = new (allocator.Allocate()) + COFFSharedLibraryAtom(*this, *name, symbolName, dllName); + _sharedLibraryAtoms._atoms.push_back(atom); + return atom; + } + + void addDefinedAtom(StringRef symbolName, StringRef dllName, + const COFFSharedLibraryAtom *dataAtom) { + auto *atom = new (allocator.Allocate()) + FuncAtom(*this, symbolName); + + // The first two byte of the atom is JMP instruction. + atom->addReference(std::unique_ptr( + new COFFReference(dataAtom, 2, llvm::COFF::IMAGE_REL_I386_DIR32))); + _definedAtoms._atoms.push_back(atom); + } + + atom_collection_vector _definedAtoms; atom_collection_vector _sharedLibraryAtoms; const TargetInfo &_targetInfo; mutable llvm::BumpPtrAllocator allocator; diff --git a/lld/test/pecoff/importlib.test b/lld/test/pecoff/importlib.test index 84b495161efa..847ac95832a3 100644 --- a/lld/test/pecoff/importlib.test +++ b/lld/test/pecoff/importlib.test @@ -7,6 +7,12 @@ CHECK: Disassembly of section .text: CHECK: .text: -CHECK: 1000: a1 00 00 40 00 -CHECK: 1005: 03 05 00 00 40 00 -CHECK: 100b: c3 +CHECK: 1000: a1 0c 10 40 00 movl 4198412, %eax +CHECK: 1005: 03 05 14 10 40 00 addl 4198420, %eax +CHECK: 100b: c3 ret +CHECK: 100c: ff 25 00 00 40 00 jmpl *4194304 +CHECK: 1012: 90 nop +CHECK: 1013: 90 nop +CHECK: 1014: ff 25 00 00 40 00 jmpl *4194304 +CHECK: 101a: 90 nop +CHECK: 101b: 90 nop