Breakpad: Refine record classification code

Previously we would classify all STACK records into a single bucket. This is not really helpful, because there are three distinct types of records beginning with the token "STACK" (STACK CFI INIT, STACK CFI, STACK WIN). To be consistent with how we're treating other records, we should classify these as three different record types. It also implements the logic to put "STACK CFI INIT" and "STACK CFI" records into the same "section" of the breakpad file, as they are meant to be read together (similar to how FUNC and LINE records are treated). The code which performs actual parsing of these records will come in a separate patch. llvm-svn: 357691
2019-04-04 13:23:25 +00:00 · 2019-04-04 13:23:25 +00:00 · dfaafbcf4c
parent ca58078dc6
commit dfaafbcf4c
7 changed files with 79 additions and 41 deletions
--- a/lldb/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms
+++ b/lldb/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms
@ -3,3 +3,5 @@ INFO CODE_ID 00000000B52499D1F0F766FFFFFF5DC3
 FILE 0 /tmp/a.c
 PUBLIC 1010 0 _start
 FILE 1 /tmp/b.c
+STACK bogus
+FILE 2 /tmp/c.c
--- a/lldb/lit/Modules/Breakpad/discontiguous-sections.test
+++ b/lldb/lit/Modules/Breakpad/discontiguous-sections.test
@ -1,7 +1,7 @@
 # Test handling discontiguous sections.
 RUN: lldb-test object-file %p/Inputs/discontiguous-sections.syms -contents | FileCheck %s

-CHECK: Showing 5 sections
+CHECK: Showing 6 sections

 CHECK:        ID: 0x1
 CHECK-NEXT:   Name: MODULE
@ -25,3 +25,10 @@ CHECK:        File size: 16
 CHECK-NEXT:   Data:  (
 CHECK-NEXT:       0000: 46494C45 2031202F 746D702F 622E630A                                      |FILE 1 /tmp/b.c.|
 CHECK-NEXT:   )
+
+CHECK:        ID: 0x6
+CHECK-NEXT:   Name: FILE
+CHECK:        File size: 16
+CHECK-NEXT:   Data:  (
+CHECK-NEXT:       0000: 46494C45 2032202F 746D702F 632E630A                                      |FILE 2 /tmp/c.c.|
+CHECK-NEXT:   )
--- a/lldb/lit/Modules/Breakpad/sections.test
+++ b/lldb/lit/Modules/Breakpad/sections.test
@ -73,7 +73,7 @@ CHECK-NEXT:   )

 CHECK:        Index: 5
 CHECK-NEXT:   ID: 0x6
-CHECK-NEXT:   Name: STACK
+CHECK-NEXT:   Name: STACK CFI INIT
 CHECK-NEXT:   Type: regular
 CHECK-NEXT:   Permissions: ---
 CHECK-NEXT:   Thread specific: no
--- a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
@ -16,11 +16,14 @@ using namespace lldb_private;
 using namespace lldb_private::breakpad;

 namespace {
-enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
+enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
 }

-static Token toToken(llvm::StringRef str) {
-  return llvm::StringSwitch<Token>(str)
+template<typename T>
+static T stringTo(llvm::StringRef Str);
+
+template <> Token stringTo<Token>(llvm::StringRef Str) {
+  return llvm::StringSwitch<Token>(Str)
      .Case("MODULE", Token::Module)
      .Case("INFO", Token::Info)
      .Case("CODE_ID", Token::CodeID)
@ -28,21 +31,25 @@ static Token toToken(llvm::StringRef str) {
      .Case("FUNC", Token::Func)
      .Case("PUBLIC", Token::Public)
      .Case("STACK", Token::Stack)
+      .Case("CFI", Token::CFI)
+      .Case("INIT", Token::Init)
      .Default(Token::Unknown);
 }

-static llvm::Triple::OSType toOS(llvm::StringRef str) {
+template <>
+llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
  using llvm::Triple;
-  return llvm::StringSwitch<Triple::OSType>(str)
+  return llvm::StringSwitch<Triple::OSType>(Str)
      .Case("Linux", Triple::Linux)
      .Case("mac", Triple::MacOSX)
      .Case("windows", Triple::Win32)
      .Default(Triple::UnknownOS);
 }

-static llvm::Triple::ArchType toArch(llvm::StringRef str) {
+template <>
+llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
  using llvm::Triple;
-  return llvm::StringSwitch<Triple::ArchType>(str)
+  return llvm::StringSwitch<Triple::ArchType>(Str)
      .Case("arm", Triple::arm)
      .Case("arm64", Triple::aarch64)
      .Case("mips", Triple::mips)
@ -56,6 +63,13 @@ static llvm::Triple::ArchType toArch(llvm::StringRef str) {
      .Default(Triple::UnknownArch);
 }

+template<typename T>
+static T consume(llvm::StringRef &Str) {
+  llvm::StringRef Token;
+  std::tie(Token, Str) = getToken(Str);
+  return stringTo<T>(Token);
+}
+
 /// Return the number of hex digits needed to encode an (POD) object of a given
 /// type.
 template <typename T> static constexpr size_t hex_digits() {
@ -112,8 +126,8 @@ static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
                                                         : sizeof(data.uuid));
 }

-Record::Kind Record::classify(llvm::StringRef Line) {
-  Token Tok = toToken(getToken(Line).first);
+llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
+  Token Tok = consume<Token>(Line);
  switch (Tok) {
  case Token::Module:
    return Record::Module;
@ -126,36 +140,45 @@ Record::Kind Record::classify(llvm::StringRef Line) {
  case Token::Public:
    return Record::Public;
  case Token::Stack:
-    return Record::Stack;
+    Tok = consume<Token>(Line);
+    switch (Tok) {
+    case Token::CFI:
+      Tok = consume<Token>(Line);
+      return Tok == Token::Init ? Record::StackCFIInit : Record::StackCFI;
+    default:
+      return llvm::None;
+    }

-  case Token::CodeID:
  case Token::Unknown:
    // Optimistically assume that any unrecognised token means this is a line
    // record, those don't have a special keyword and start directly with a
    // hex number. CODE_ID should never be at the start of a line, but if it
    // is, it can be treated the same way as a garbled line record.
    return Record::Line;
+
+  case Token::CodeID:
+  case Token::CFI:
+  case Token::Init:
+    // These should never appear at the start of a valid record.
+    return llvm::None;
  }
  llvm_unreachable("Fully covered switch above!");
 }

 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
  // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::Module)
+  if (consume<Token>(Line) != Token::Module)
    return llvm::None;

-  std::tie(Str, Line) = getToken(Line);
-  llvm::Triple::OSType OS = toOS(Str);
+  llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
  if (OS == llvm::Triple::UnknownOS)
    return llvm::None;

-  std::tie(Str, Line) = getToken(Line);
-  llvm::Triple::ArchType Arch = toArch(Str);
+  llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
  if (Arch == llvm::Triple::UnknownArch)
    return llvm::None;

+  llvm::StringRef Str;
  std::tie(Str, Line) = getToken(Line);
  UUID ID = parseModuleId(OS, Str);
  if (!ID)
@ -173,15 +196,13 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,

 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
  // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
+  if (consume<Token>(Line) != Token::Info)
+    return llvm::None;
+
+  if (consume<Token>(Line) != Token::CodeID)
+    return llvm::None;
+
  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::Info)
-    return llvm::None;
-
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::CodeID)
-    return llvm::None;
-
  std::tie(Str, Line) = getToken(Line);
  // If we don't have any text following the code ID (e.g. on linux), we should
  // use this as the UUID. Otherwise, we should revert back to the module ID.
@ -200,11 +221,10 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,

 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
  // FILE number name
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::File)
+  if (consume<Token>(Line) != Token::File)
    return llvm::None;

+  llvm::StringRef Str;
  size_t Number;
  std::tie(Str, Line) = getToken(Line);
  if (!to_integer(Str, Number))
@ -231,11 +251,10 @@ static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,

  Token Tok = Size ? Token::Func : Token::Public;

-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Tok)
+  if (consume<Token>(Line) != Tok)
    return false;

+  llvm::StringRef Str;
  std::tie(Str, Line) = getToken(Line);
  Multiple = Str == "m";

@ -354,8 +373,10 @@ llvm::StringRef breakpad::toString(Record::Kind K) {
    return "LINE";
  case Record::Public:
    return "PUBLIC";
-  case Record::Stack:
-    return "STACK";
+  case Record::StackCFIInit:
+    return "STACK CFI INIT";
+  case Record::StackCFI:
+    return "STACK CFI";
  }
  llvm_unreachable("Unknown record kind!");
 }
--- a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
@ -20,12 +20,12 @@ namespace breakpad {

 class Record {
 public:
-  enum Kind { Module, Info, File, Func, Line, Public, Stack };
+  enum Kind { Module, Info, File, Func, Line, Public, StackCFIInit, StackCFI };

  /// Attempt to guess the kind of the record present in the argument without
  /// doing a full parse. The returned kind will always be correct for valid
  /// records, but the full parse can still fail in case of corrupted input.
-  static Kind classify(llvm::StringRef Line);
+  static llvm::Optional<Kind> classify(llvm::StringRef Line);

 protected:
  Record(Kind K) : TheKind(K) {}
--- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
@ -148,11 +148,14 @@ void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
    llvm::StringRef line;
    std::tie(line, text) = text.split('\n');

-    Record::Kind next_section = Record::classify(line);
+    llvm::Optional<Record::Kind> next_section = Record::classify(line);
    if (next_section == Record::Line) {
      // Line records logically belong to the preceding Func record, so we put
      // them in the same section.
      next_section = Record::Func;
+    } else if (next_section == Record::StackCFI) {
+      // Same goes for StackCFI and StackCFIInit
+      next_section = Record::StackCFIInit;
    }
    if (next_section == current_section)
      continue;
--- a/lldb/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp
+++ b/lldb/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp
@ -19,13 +19,18 @@ TEST(Record, classify) {
  EXPECT_EQ(Record::File, Record::classify("FILE"));
  EXPECT_EQ(Record::Func, Record::classify("FUNC"));
  EXPECT_EQ(Record::Public, Record::classify("PUBLIC"));
-  EXPECT_EQ(Record::Stack, Record::classify("STACK"));
+  EXPECT_EQ(Record::StackCFIInit, Record::classify("STACK CFI INIT"));
+  EXPECT_EQ(Record::StackCFI, Record::classify("STACK CFI"));
+
+  // Any obviously incorrect lines will be classified as such.
+  EXPECT_EQ(llvm::None, Record::classify("STACK"));
+  EXPECT_EQ(llvm::None, Record::classify("STACK CODE_ID"));
+  EXPECT_EQ(llvm::None, Record::classify("CODE_ID"));

  // Any line which does not start with a known keyword will be classified as a
  // line record, as those are the only ones that start without a keyword.
  EXPECT_EQ(Record::Line, Record::classify("deadbeef"));
  EXPECT_EQ(Record::Line, Record::classify("12"));
-  EXPECT_EQ(Record::Line, Record::classify("CODE_ID"));
 }

 TEST(ModuleRecord, parse) {