llvm-project/lld/COFF/ModuleDef.cpp

292 lines
6.6 KiB
C++
Raw Normal View History

//===- COFF/ModuleDef.cpp -------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Windows-specific.
// A parser for the module-definition file (.def file).
// Parsed results are directly written to Config global variable.
//
// The format of module-definition files are described in this document:
// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
//
//===----------------------------------------------------------------------===//
#include "Config.h"
#include "Error.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
using namespace llvm;
namespace lld {
namespace coff {
namespace {
enum Kind {
Unknown,
Eof,
Identifier,
Comma,
Equal,
KwBase,
KwData,
KwExports,
KwHeapsize,
KwLibrary,
KwName,
KwNoname,
KwPrivate,
KwStacksize,
KwVersion,
};
struct Token {
explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
Kind K;
StringRef Value;
};
COFF: Improve dllexported name mangling compatibility. The rules for dllexported symbols are overly complicated due to x86 name decoration, fuzzy symbol resolution, and the fact that one symbol can be resolved by so many different names. The rules are probably intended to be "intuitive", so that users don't have to understand the name mangling schemes, but it seems that it can lead to unintended symbol exports. To make it clear what I'm trying to do with this patch, let me write how the export rules are subtle and complicated. - x86 name decoration: If machine type is i386 and export name is given by a command line option, like /export:foo, the real symbol name the linker has to search for is _foo because all symbols are decorated with "_" prefixes. This doesn't happen on non-x86 machines. This automatic name decoration happens only when the name is not C++ mangled. However, the symbol name exported from DLLs are ones without "_" on all platforms. Moreover, if the option is given via .drectve section, no symbol decoration is done (the reason being that the .drectve section is created by a compiler and the compiler should always know the exact name of the symbol, I guess). - Fuzzy symbol resolution: In addition to x86 name decoration, the linker has to look for cdecl or C++ mangled symbols for a given /export. For example, it searches for not only _foo but also _foo@<number> or ??foo@... for /export:foo. Previous implementation didn't get it right. I'm trying to make it as compatible with MSVC linker as possible with this patch however the rules are. The new code looks a bit messy to me, but I don't think it can be simpler due to the ad-hoc-ness of the rules. llvm-svn: 246424
2015-08-31 16:43:21 +08:00
static bool isDecorated(StringRef Sym) {
return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
}
class Lexer {
public:
explicit Lexer(StringRef S) : Buf(S) {}
Token lex() {
Buf = Buf.trim();
if (Buf.empty())
return Token(Eof);
switch (Buf[0]) {
case '\0':
return Token(Eof);
case ';': {
size_t End = Buf.find('\n');
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return lex();
}
case '=':
Buf = Buf.drop_front();
return Token(Equal, "=");
case ',':
Buf = Buf.drop_front();
return Token(Comma, ",");
case '"': {
StringRef S;
std::tie(S, Buf) = Buf.substr(1).split('"');
return Token(Identifier, S);
}
default: {
size_t End = Buf.find_first_of("=,\r\n \t\v");
StringRef Word = Buf.substr(0, End);
Kind K = llvm::StringSwitch<Kind>(Word)
.Case("BASE", KwBase)
.Case("DATA", KwData)
.Case("EXPORTS", KwExports)
.Case("HEAPSIZE", KwHeapsize)
.Case("LIBRARY", KwLibrary)
.Case("NAME", KwName)
.Case("NONAME", KwNoname)
.Case("PRIVATE", KwPrivate)
.Case("STACKSIZE", KwStacksize)
.Case("VERSION", KwVersion)
.Default(Identifier);
Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
return Token(K, Word);
}
}
}
private:
StringRef Buf;
};
class Parser {
public:
explicit Parser(StringRef S, StringSaver *A) : Lex(S), Alloc(A) {}
void parse() {
do {
parseOne();
} while (Tok.K != Eof);
}
private:
void read() {
if (Stack.empty()) {
Tok = Lex.lex();
return;
}
Tok = Stack.back();
Stack.pop_back();
}
void readAsInt(uint64_t *I) {
read();
if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
error("integer expected");
}
void expect(Kind Expected, StringRef Msg) {
read();
if (Tok.K != Expected)
error(Msg);
}
void unget() { Stack.push_back(Tok); }
void parseOne() {
read();
switch (Tok.K) {
case Eof:
return;
case KwExports:
for (;;) {
read();
if (Tok.K != Identifier) {
unget();
return;
}
parseExport();
}
case KwHeapsize:
parseNumbers(&Config->HeapReserve, &Config->HeapCommit);
return;
case KwLibrary:
parseName(&Config->OutputFile, &Config->ImageBase);
if (!StringRef(Config->OutputFile).endswith_lower(".dll"))
Config->OutputFile += ".dll";
return;
case KwStacksize:
parseNumbers(&Config->StackReserve, &Config->StackCommit);
return;
case KwName:
parseName(&Config->OutputFile, &Config->ImageBase);
return;
case KwVersion:
parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion);
return;
default:
error(Twine("unknown directive: ") + Tok.Value);
}
}
void parseExport() {
Export E;
E.Name = Tok.Value;
read();
if (Tok.K == Equal) {
read();
if (Tok.K != Identifier)
error(Twine("identifier expected, but got ") + Tok.Value);
E.ExtName = E.Name;
E.Name = Tok.Value;
} else {
unget();
}
COFF: Improve dllexported name mangling compatibility. The rules for dllexported symbols are overly complicated due to x86 name decoration, fuzzy symbol resolution, and the fact that one symbol can be resolved by so many different names. The rules are probably intended to be "intuitive", so that users don't have to understand the name mangling schemes, but it seems that it can lead to unintended symbol exports. To make it clear what I'm trying to do with this patch, let me write how the export rules are subtle and complicated. - x86 name decoration: If machine type is i386 and export name is given by a command line option, like /export:foo, the real symbol name the linker has to search for is _foo because all symbols are decorated with "_" prefixes. This doesn't happen on non-x86 machines. This automatic name decoration happens only when the name is not C++ mangled. However, the symbol name exported from DLLs are ones without "_" on all platforms. Moreover, if the option is given via .drectve section, no symbol decoration is done (the reason being that the .drectve section is created by a compiler and the compiler should always know the exact name of the symbol, I guess). - Fuzzy symbol resolution: In addition to x86 name decoration, the linker has to look for cdecl or C++ mangled symbols for a given /export. For example, it searches for not only _foo but also _foo@<number> or ??foo@... for /export:foo. Previous implementation didn't get it right. I'm trying to make it as compatible with MSVC linker as possible with this patch however the rules are. The new code looks a bit messy to me, but I don't think it can be simpler due to the ad-hoc-ness of the rules. llvm-svn: 246424
2015-08-31 16:43:21 +08:00
if (Config->Machine == I386) {
if (!isDecorated(E.Name))
E.Name = Alloc->save("_" + E.Name);
if (!E.ExtName.empty() && !isDecorated(E.ExtName))
E.ExtName = Alloc->save("_" + E.ExtName);
}
for (;;) {
read();
if (Tok.K == Identifier && Tok.Value[0] == '@') {
Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
read();
if (Tok.K == KwNoname) {
E.Noname = true;
} else {
unget();
}
continue;
}
if (Tok.K == KwData) {
E.Data = true;
continue;
}
if (Tok.K == KwPrivate) {
E.Private = true;
continue;
}
unget();
Config->Exports.push_back(E);
return;
}
}
// HEAPSIZE/STACKSIZE reserve[,commit]
void parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
readAsInt(Reserve);
read();
if (Tok.K != Comma) {
unget();
Commit = 0;
return;
}
readAsInt(Commit);
}
// NAME outputPath [BASE=address]
void parseName(std::string *Out, uint64_t *Baseaddr) {
read();
if (Tok.K == Identifier) {
*Out = Tok.Value;
} else {
*Out = "";
unget();
return;
}
read();
if (Tok.K == KwBase) {
expect(Equal, "'=' expected");
readAsInt(Baseaddr);
} else {
unget();
*Baseaddr = 0;
}
}
// VERSION major[.minor]
void parseVersion(uint32_t *Major, uint32_t *Minor) {
read();
if (Tok.K != Identifier)
error(Twine("identifier expected, but got ") + Tok.Value);
StringRef V1, V2;
std::tie(V1, V2) = Tok.Value.split('.');
if (V1.getAsInteger(10, *Major))
error(Twine("integer expected, but got ") + Tok.Value);
if (V2.empty())
*Minor = 0;
else if (V2.getAsInteger(10, *Minor))
error(Twine("integer expected, but got ") + Tok.Value);
}
Lexer Lex;
Token Tok;
std::vector<Token> Stack;
StringSaver *Alloc;
};
} // anonymous namespace
void parseModuleDefs(MemoryBufferRef MB, StringSaver *Alloc) {
Parser(MB.getBuffer(), Alloc).parse();
}
} // namespace coff
} // namespace lld