[LLD][ELF] Add support for INPUT_SECTION_FLAGS

The INPUT_SECTION_FLAGS linker script command is used to constrain the section pattern matching to sections that match certain combinations of flags. There are two ways to express the constraint. withFlags: Section must have these flags. withoutFlags: Section must not have these flags. The syntax of the command is: INPUT_SECTION_FLAGS '(' sect_flag_list ')' sect_flag_list: NAME | sect_flag_list '&' NAME Where NAME matches a section flag name such as SHF_EXECINSTR, or the integer value of a section flag. If the first character of NAME is ! then it means must not contain flag. We do not support the rare case of { INPUT_SECTION_FLAGS(flags) filespec } where filespec has no input section description like (.text). As an example from the ld man page: SECTIONS { .text : { INPUT_SECTION_FLAGS (SHF_MERGE & SHF_STRINGS) *(.text) } .text2 : { INPUT_SECTION_FLAGS (!SHF_WRITE) *(.text) } } .text will match sections called .text that have both the SHF_MERGE and SHF_STRINGS flag. .text2 will match sections called .text that don't have the SHF_WRITE flag. The flag names accepted are the generic to all targets and SHF_ARM_PURECODE as it is very useful to filter all the pure code sections into a single program header that can be marked execute never. fixes PR44265 Differential Revision: https://reviews.llvm.org/D72756
2020-01-15 09:38:00 +00:00 · 2020-01-15 09:38:00 +00:00 · dbd0ad3366
parent 6b4f86f65f
commit dbd0ad3366
8 changed files with 284 additions and 13 deletions
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@ -335,7 +335,9 @@ bool LinkerScript::shouldKeep(InputSectionBase *s) {
  for (InputSectionDescription *id : keptSections)
    if (id->filePat.match(filename))
      for (SectionPattern &p : id->sectionPatterns)
-        if (p.sectionPat.match(s->name))
+        if (p.sectionPat.match(s->name) &&
+            (s->flags & id->withFlags) == id->withFlags &&
+            (s->flags & id->withoutFlags) == 0)
          return true;
  return false;
 }
@ -431,7 +433,10 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) {
        continue;

      std::string filename = getFilename(sec->file);
-      if (!cmd->filePat.match(filename) || pat.excludedFilePat.match(filename))
+      if (!cmd->filePat.match(filename) ||
+          pat.excludedFilePat.match(filename) ||
+          (sec->flags & cmd->withFlags) != cmd->withFlags ||
+          (sec->flags & cmd->withoutFlags) != 0)
        continue;

      ret.push_back(sec);
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@ -155,8 +155,10 @@ struct SectionPattern {
 };

 struct InputSectionDescription : BaseCommand {
-  InputSectionDescription(StringRef filePattern)
-      : BaseCommand(InputSectionKind), filePat(filePattern) {}
+  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
+                          uint64_t withoutFlags = 0)
+      : BaseCommand(InputSectionKind), filePat(filePattern),
+        withFlags(withFlags), withoutFlags(withoutFlags) {}

  static bool classof(const BaseCommand *c) {
    return c->kind == InputSectionKind;
@ -180,6 +182,10 @@ struct InputSectionDescription : BaseCommand {
  // they were created in. This is used to insert newly created ThunkSections
  // into Sections at the end of a createThunks() pass.
  std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
+
+  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
+  uint64_t withFlags;
+  uint64_t withoutFlags;
 };

 // Represents BYTE(), SHORT(), LONG(), or QUAD().
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@ -30,6 +30,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <cassert>
 #include <limits>
 #include <vector>
@ -91,10 +92,13 @@ private:
  OutputSection *readOutputSectionDescription(StringRef outSec);
  std::vector<BaseCommand *> readOverlay();
  std::vector<StringRef> readOutputSectionPhdrs();
+  std::pair<uint64_t, uint64_t> readInputSectionFlags();
  InputSectionDescription *readInputSectionDescription(StringRef tok);
  StringMatcher readFilePatterns();
  std::vector<SectionPattern> readInputSectionsList();
-  InputSectionDescription *readInputSectionRules(StringRef filePattern);
+  InputSectionDescription *readInputSectionRules(StringRef filePattern,
+                                                 uint64_t withFlags,
+                                                 uint64_t withoutFlags);
  unsigned readPhdrType();
  SortSectionPolicy readSortKind();
  SymbolAssignment *readProvideHidden(bool provide, bool hidden);
@ -657,8 +661,10 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
 //
 // <section-list> is parsed by readInputSectionsList().
 InputSectionDescription *
-ScriptParser::readInputSectionRules(StringRef filePattern) {
-  auto *cmd = make<InputSectionDescription>(filePattern);
+ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
+                                    uint64_t withoutFlags) {
+  auto *cmd =
+      make<InputSectionDescription>(filePattern, withFlags, withoutFlags);
  expect("(");

  while (!errorCount() && !consume(")")) {
@ -694,15 +700,23 @@ InputSectionDescription *
 ScriptParser::readInputSectionDescription(StringRef tok) {
  // Input section wildcard can be surrounded by KEEP.
  // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
+  uint64_t withFlags = 0;
+  uint64_t withoutFlags = 0;
  if (tok == "KEEP") {
    expect("(");
-    StringRef filePattern = next();
-    InputSectionDescription *cmd = readInputSectionRules(filePattern);
+    if (consume("INPUT_SECTION_FLAGS"))
+      std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    InputSectionDescription *cmd =
+        readInputSectionRules(next(), withFlags, withoutFlags);
    expect(")");
    script->keptSections.push_back(cmd);
    return cmd;
  }
-  return readInputSectionRules(tok);
+  if (tok == "INPUT_SECTION_FLAGS") {
+    std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    tok = next();
+  }
+  return readInputSectionRules(tok, withFlags, withoutFlags);
 }

 void ScriptParser::readSort() {
@ -781,9 +795,14 @@ OutputSection *ScriptParser::readOverlaySectionDescription() {
      script->createOutputSection(next(), getCurrentLocation());
  cmd->inOverlay = true;
  expect("{");
-  while (!errorCount() && !consume("}"))
-    cmd->sectionCommands.push_back(readInputSectionRules(next()));
-  cmd->phdrs = readOutputSectionPhdrs();
+  while (!errorCount() && !consume("}")) {
+    uint64_t withFlags = 0;
+    uint64_t withoutFlags = 0;
+    if (consume("INPUT_SECTION_FLAGS"))
+      std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    cmd->sectionCommands.push_back(
+        readInputSectionRules(next(), withFlags, withoutFlags));
+  }
  return cmd;
 }

@ -841,6 +860,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
      // We have a file name and no input sections description. It is not a
      // commonly used syntax, but still acceptable. In that case, all sections
      // from the file will be included.
+      // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not
+      // handle this case here as it will already have been matched by the
+      // case above.
      auto *isd = make<InputSectionDescription>(tok);
      isd->sectionPatterns.push_back({{}, StringMatcher({"*"})});
      cmd->sectionCommands.push_back(isd);
@ -1102,6 +1124,63 @@ ByteCommand *ScriptParser::readByteCommand(StringRef tok) {
  return make<ByteCommand>(e, size, commandString);
 }

+static llvm::Optional<uint64_t> parseFlag(StringRef tok) {
+  if (llvm::Optional<uint64_t> asInt = parseInt(tok))
+    return asInt;
+#define CASE_ENT(enum) #enum, ELF::enum
+  return StringSwitch<llvm::Optional<uint64_t>>(tok)
+      .Case(CASE_ENT(SHF_WRITE))
+      .Case(CASE_ENT(SHF_ALLOC))
+      .Case(CASE_ENT(SHF_EXECINSTR))
+      .Case(CASE_ENT(SHF_MERGE))
+      .Case(CASE_ENT(SHF_STRINGS))
+      .Case(CASE_ENT(SHF_INFO_LINK))
+      .Case(CASE_ENT(SHF_LINK_ORDER))
+      .Case(CASE_ENT(SHF_OS_NONCONFORMING))
+      .Case(CASE_ENT(SHF_GROUP))
+      .Case(CASE_ENT(SHF_TLS))
+      .Case(CASE_ENT(SHF_COMPRESSED))
+      .Case(CASE_ENT(SHF_EXCLUDE))
+      .Case(CASE_ENT(SHF_ARM_PURECODE))
+      .Default(None);
+#undef CASE_ENT
+}
+
+// Reads the '(' <flags> ')' list of section flags in
+// INPUT_SECTION_FLAGS '(' <flags> ')' in the
+// following form:
+// <flags> ::= <flag>
+//           | <flags> & flag
+// <flag>  ::= Recognized Flag Name, or Integer value of flag.
+// If the first character of <flag> is a ! then this means without flag,
+// otherwise with flag.
+// Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and
+// without flag SHF_WRITE.
+std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
+   uint64_t withFlags = 0;
+   uint64_t withoutFlags = 0;
+   expect("(");
+   while (!errorCount()) {
+    StringRef tok = unquote(next());
+    bool without = tok.consume_front("!");
+    if (llvm::Optional<uint64_t> flag = parseFlag(tok)) {
+      if (without)
+        withoutFlags |= *flag;
+      else
+        withFlags |= *flag;
+    } else {
+      setError("unrecognised flag: " + tok);
+    }
+    if (consume(")"))
+      break;
+    if (!consume("&")) {
+      next();
+      setError("expected & or )");
+    }
+  }
+  return std::make_pair(withFlags, withoutFlags);
+}
+
 StringRef ScriptParser::readParenLiteral() {
  expect("(");
  bool orig = inExpr;
--- a/lld/test/ELF/input-section-flags-diag1.test
+++ b/lld/test/ELF/input-section-flags-diag1.test
@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that the section flag is recognized.
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(UNKNOWN_FLAG) *(.text) }
+}
+
+# CHECK: unrecognised flag: UNKNOWN_FLAG
+# CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(UNKNOWN_FLAG) *(.text) }
+# CHECK-NEXT: >>>                                ^
--- a/lld/test/ELF/input-section-flags-diag2.test
+++ b/lld/test/ELF/input-section-flags-diag2.test
@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that we start with a flag
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(& SHF_ALLOC) *(.text) }
+}
+
+# CHECK:  unrecognised flag: &
+# CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(& SHF_ALLOC) *(.text) }
+# CHECK-NEXT: >>>                                ^
--- a/lld/test/ELF/input-section-flags-diag3.test
+++ b/lld/test/ELF/input-section-flags-diag3.test
@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that flags are separated by &
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(SHF_ALLOC SHF_EXECINSTR) *(.text) }
+}
+
+// CHECK: expected & or )
+// CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(SHF_ALLOC SHF_EXECINSTR) *(.text) }
+// CHECK-NEXT: >>>                                          ^
--- a/lld/test/ELF/input-section-flags-keep.s
+++ b/lld/test/ELF/input-section-flags-keep.s
@ -0,0 +1,27 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo "SECTIONS { \
+# RUN:  . = SIZEOF_HEADERS; \
+# RUN:  .keep : { KEEP( INPUT_SECTION_FLAGS(!SHF_WRITE) *(.sec*)) } \
+# RUN:  }" > %t.script
+# RUN: ld.lld --gc-sections -o %t --script %t.script %t.o
+# RUN: llvm-readobj --symbols %t | FileCheck %s
+
+## Check that INPUT_SECTION_FLAGS can be used within KEEP, and affects what
+## is kept.
+# CHECK: Name: keep
+# CHECK-NOT: NAME: collect
+.text
+.global _start
+_start:
+ .long 0
+
+.section .sec1, "a"
+.global keep
+keep:
+ .long 1
+
+.section .sec2, "aw"
+.global collect
+collect:
+ .long 2
--- a/lld/test/ELF/input-section-flags.s
+++ b/lld/test/ELF/input-section-flags.s
@ -0,0 +1,115 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+
+## Test the INPUT_SECTION_FLAGS feature. It prefixes an input section list and
+## restricts matches to sections that have the required flags and do not have
+## any of the must not have flags.
+
+## Uniquely identify each .sec section by flag alone, with .text going into
+## to the SHF_EXECINSTR requiring .outsec2
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                  !SHF_WRITE & !SHF_MERGE) *(.sec.*) } \
+# RUN: .outsec2 : { INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE\
+# RUN:                                  & !SHF_MERGE) *(.sec.* .text) } \
+# RUN: .outsec3 : { INPUT_SECTION_FLAGS(SHF_WRITE) *(.sec.*) } \
+# RUN: .outsec4 : { INPUT_SECTION_FLAGS(SHF_MERGE & !SHF_STRINGS) *(.sec.*) } \
+# RUN: .outsec5 : { INPUT_SECTION_FLAGS(SHF_STRINGS) *(.sec.*) } \
+# RUN: } " > %t.script
+# RUN: ld.lld -o %t1 --script %t.script %t.o
+# RUN: llvm-readobj --symbols %t1 | FileCheck %s
+# CHECK:  Name: _start
+# CHECK:  Section: .outsec2
+# CHECK:  Name: s1
+# CHECK:  Section: .outsec1
+# CHECK:  Name: s2
+# CHECK:  Section: .outsec2
+# CHECK:  Name: s3
+# CHECK:  Section: .outsec3
+# CHECK:  Name: s4
+# CHECK:  Section: .outsec4
+# CHECK:  Name: s5
+# CHECK:  Section: .outsec5
+
+## Same test but using OVERLAY.
+# RUN: echo "SECTIONS { \
+# RUN: OVERLAY 0x1000 : AT ( 0x4000 ) { \
+# RUN: .outsec1 { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                !SHF_WRITE & !SHF_MERGE) *(.sec.*) }\
+# RUN: .outsec2 { INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE \
+# RUN:                                & !SHF_MERGE) *(.sec.* .text) } \
+# RUN: .outsec3 { INPUT_SECTION_FLAGS(SHF_WRITE) *(.sec.*) } \
+# RUN: .outsec4 { INPUT_SECTION_FLAGS(SHF_MERGE & !SHF_STRINGS) *(.sec.*) } \
+# RUN: .outsec5 { INPUT_SECTION_FLAGS(SHF_STRINGS) *(.sec.*) } \
+# RUN: } } " > %t2.script
+
+# RUN: ld.lld -o %t2 --script %t2.script %t.o
+# RUN: llvm-readobj --symbols %t2 | FileCheck %s
+
+## Same test but using hex representations of the flags.
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(0x2 & !0x4 & !0x1 & !0x10) *(.sec.*) }\
+# RUN: .outsec2 : { INPUT_SECTION_FLAGS(0x2 & 0x4 & !0x1 & !0x10) \
+# RUN:              *(.sec.* .text) } \
+# RUN: .outsec3 : { INPUT_SECTION_FLAGS(0x1) *(.sec.*) } \
+# RUN: .outsec4 : { INPUT_SECTION_FLAGS(0x10 & !0x20) *(.sec.*) } \
+# RUN: .outsec5 : { INPUT_SECTION_FLAGS(0x20) *(.sec.*) } \
+# RUN: } " > %t3.script
+
+# RUN: ld.lld -o %t3 --script %t3.script %t.o
+# RUN: llvm-readobj --symbols %t3 | FileCheck %s
+
+## Check that we can handle multiple InputSectionDescriptions in a single
+## OutputSection
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                  !SHF_WRITE & !SHF_MERGE) *(.sec.*) ; \
+# RUN:              INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE\
+# RUN:                                  & !SHF_MERGE)  *(.sec.* *.text) }\
+# RUN: } " > %t4.script
+
+# RUN: ld.lld -o %t4 --script %t4.script %t.o
+# RUN: llvm-readobj --symbols %t4 | FileCheck --check-prefix MULTIPLE %s
+
+# MULTIPLE:  Name: _start
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s1
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s2
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s3
+# MULTIPLE:  Section: .sec.aw
+# MULTIPLE:  Name: s4
+# MULTIPLE:  Section: .sec.aM
+# MULTIPLE:  Name: s5
+# MULTIPLE:  Section: .sec.aMS
+
+ .text
+ .global _start
+_start:
+ nop
+
+ .section .sec.a, "a", @progbits
+ .globl s1
+s1:
+ .long 1
+
+ .section .sec.ax, "ax", @progbits
+ .globl s2
+s2:
+ .long 2
+
+ .section .sec.aw, "aw", @progbits
+ .globl s3
+s3:
+ .long 3
+
+ .section .sec.aM, "aM", @progbits, 4
+ .globl s4
+s4:
+ .long 4
+
+ .section .sec.aMS, "aMS", @progbits, 1
+ .globl s5
+s5:
+ .asciz "a"