From e4f385d89448393b4d213339bbbbbbaa42b49489 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 11 Mar 2021 09:34:36 -0800 Subject: [PATCH] [ELF] Support . and $ in symbol names in expressions GNU ld supports `.` and `$` in symbol names while LLD doesn't support them in `readPrimary` expressions. Using `.` can result in such an error: ``` https://github.com/ClangBuiltLinux/linux/issues/1318 ld.lld: error: ./arch/powerpc/kernel/vmlinux.lds:255: malformed number: .TOC. >>> __toc_ptr = (DEFINED (.TOC.) ? .TOC. : ADDR (.got)) + 0x8000; ``` Allow `.` (ppc64 special symbol `.TOC.`) and `$` (RISC-V special symbol `__global_pointer$`). Change `diag[3-5].test` to use an invalid character `^`. Note: GNU ld allows `~` in non-leading positions of a symbol name. `~` is not used in practice, conflicts with the unary operator, and can cause some parsing difficulty, so this patch does not add it. Differential Revision: https://reviews.llvm.org/D98306 --- lld/ELF/ScriptParser.cpp | 9 ++++++++- lld/test/ELF/linkerscript/diag3.test | 6 +++--- lld/test/ELF/linkerscript/diag4.test | 8 ++++---- lld/test/ELF/linkerscript/diag5.test | 6 +++--- lld/test/ELF/linkerscript/symbol-name.test | 8 ++++++++ 5 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 lld/test/ELF/linkerscript/symbol-name.test diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 4b15a71f029b..55093cc64bb9 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -1234,6 +1234,13 @@ static void checkIfExists(OutputSection *cmd, StringRef location) { error(location + ": undefined section " + cmd->name); } +static bool isValidSymbolName(StringRef s) { + auto valid = [](char c) { + return isAlnum(c) || c == '$' || c == '.' || c == '_'; + }; + return !s.empty() && !isDigit(s[0]) && llvm::all_of(s, valid); +} + Expr ScriptParser::readPrimary() { if (peek() == "(") return readParenExpr(); @@ -1408,7 +1415,7 @@ Expr ScriptParser::readPrimary() { return [=] { return *val; }; // Tok is a symbol name. - if (!isValidCIdentifier(tok)) + if (!isValidSymbolName(tok)) setError("malformed number: " + tok); script->referencedSymbols.push_back(tok); return [=] { return script->getSymbolValue(tok, location); }; diff --git a/lld/test/ELF/linkerscript/diag3.test b/lld/test/ELF/linkerscript/diag3.test index 8a0d552b1a89..1e4371241f30 100644 --- a/lld/test/ELF/linkerscript/diag3.test +++ b/lld/test/ELF/linkerscript/diag3.test @@ -5,9 +5,9 @@ SECTIONS { .text : { *(.text) } .keep : { *(.keep) } - boom .temp : { *(.temp) } + boom ^temp : { *(.temp) } } -# CHECK: 8: malformed number: .temp -# CHECK-NEXT: >>> boom .temp : { *(.temp) } +# CHECK: 8: malformed number: ^temp +# CHECK-NEXT: >>> boom ^temp : { *(.temp) } # CHECK-NEXT: >>> ^ diff --git a/lld/test/ELF/linkerscript/diag4.test b/lld/test/ELF/linkerscript/diag4.test index b04b6c6bd8ff..15d81fdc2be4 100644 --- a/lld/test/ELF/linkerscript/diag4.test +++ b/lld/test/ELF/linkerscript/diag4.test @@ -6,9 +6,9 @@ SECTIONS { .text : { *(.text) } .keep : { *(.keep) } - boom .temp : { *(.temp) } + boom ^temp : { *(.temp) } } -# CHECK: 9: malformed number: .temp -# CHECK-NEXT: >>> boom .temp : { *(.temp) } -# CHECK-NEXT: >>> ^ \ No newline at end of file +# CHECK: 9: malformed number: ^temp +# CHECK-NEXT: >>> boom ^temp : { *(.temp) } +# CHECK-NEXT: >>> ^ diff --git a/lld/test/ELF/linkerscript/diag5.test b/lld/test/ELF/linkerscript/diag5.test index ebeee087d65e..15d81fdc2be4 100644 --- a/lld/test/ELF/linkerscript/diag5.test +++ b/lld/test/ELF/linkerscript/diag5.test @@ -6,9 +6,9 @@ SECTIONS { .text : { *(.text) } .keep : { *(.keep) } - boom .temp : { *(.temp) } + boom ^temp : { *(.temp) } } -# CHECK: 9: malformed number: .temp -# CHECK-NEXT: >>> boom .temp : { *(.temp) } +# CHECK: 9: malformed number: ^temp +# CHECK-NEXT: >>> boom ^temp : { *(.temp) } # CHECK-NEXT: >>> ^ diff --git a/lld/test/ELF/linkerscript/symbol-name.test b/lld/test/ELF/linkerscript/symbol-name.test new file mode 100644 index 000000000000..e9755add7950 --- /dev/null +++ b/lld/test/ELF/linkerscript/symbol-name.test @@ -0,0 +1,8 @@ +# REQUIRES: x86 +## Test that . and $ can be used by symbol names in expressions. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o +# RUN: ld.lld -T %s %t.o -o /dev/null + +a0 = DEFINED(.TOC.) ? .TOC. : 0; +a1 = DEFINED(__global_pointer$) ? __global_pointer$ : 0;