Breakpad: Refine record classification code

Previously we would classify all STACK records into a single bucket.
This is not really helpful, because there are three distinct types of
records beginning with the token "STACK" (STACK CFI INIT, STACK CFI,
STACK WIN). To be consistent with how we're treating other records, we
should classify these as three different record types.

It also implements the logic to put "STACK CFI INIT" and "STACK CFI"
records into the same "section" of the breakpad file, as they are meant
to be read together (similar to how FUNC and LINE records are treated).

The code which performs actual parsing of these records will come in a
separate patch.

llvm-svn: 357691
This commit is contained in:
Pavel Labath 2019-04-04 13:23:25 +00:00
parent ca58078dc6
commit dfaafbcf4c
7 changed files with 79 additions and 41 deletions

View File

@ -3,3 +3,5 @@ INFO CODE_ID 00000000B52499D1F0F766FFFFFF5DC3
FILE 0 /tmp/a.c
PUBLIC 1010 0 _start
FILE 1 /tmp/b.c
STACK bogus
FILE 2 /tmp/c.c

View File

@ -1,7 +1,7 @@
# Test handling discontiguous sections.
RUN: lldb-test object-file %p/Inputs/discontiguous-sections.syms -contents | FileCheck %s
CHECK: Showing 5 sections
CHECK: Showing 6 sections
CHECK: ID: 0x1
CHECK-NEXT: Name: MODULE
@ -25,3 +25,10 @@ CHECK: File size: 16
CHECK-NEXT: Data: (
CHECK-NEXT: 0000: 46494C45 2031202F 746D702F 622E630A |FILE 1 /tmp/b.c.|
CHECK-NEXT: )
CHECK: ID: 0x6
CHECK-NEXT: Name: FILE
CHECK: File size: 16
CHECK-NEXT: Data: (
CHECK-NEXT: 0000: 46494C45 2032202F 746D702F 632E630A |FILE 2 /tmp/c.c.|
CHECK-NEXT: )

View File

@ -73,7 +73,7 @@ CHECK-NEXT: )
CHECK: Index: 5
CHECK-NEXT: ID: 0x6
CHECK-NEXT: Name: STACK
CHECK-NEXT: Name: STACK CFI INIT
CHECK-NEXT: Type: regular
CHECK-NEXT: Permissions: ---
CHECK-NEXT: Thread specific: no

View File

@ -16,11 +16,14 @@ using namespace lldb_private;
using namespace lldb_private::breakpad;
namespace {
enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
}
static Token toToken(llvm::StringRef str) {
return llvm::StringSwitch<Token>(str)
template<typename T>
static T stringTo(llvm::StringRef Str);
template <> Token stringTo<Token>(llvm::StringRef Str) {
return llvm::StringSwitch<Token>(Str)
.Case("MODULE", Token::Module)
.Case("INFO", Token::Info)
.Case("CODE_ID", Token::CodeID)
@ -28,21 +31,25 @@ static Token toToken(llvm::StringRef str) {
.Case("FUNC", Token::Func)
.Case("PUBLIC", Token::Public)
.Case("STACK", Token::Stack)
.Case("CFI", Token::CFI)
.Case("INIT", Token::Init)
.Default(Token::Unknown);
}
static llvm::Triple::OSType toOS(llvm::StringRef str) {
template <>
llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
using llvm::Triple;
return llvm::StringSwitch<Triple::OSType>(str)
return llvm::StringSwitch<Triple::OSType>(Str)
.Case("Linux", Triple::Linux)
.Case("mac", Triple::MacOSX)
.Case("windows", Triple::Win32)
.Default(Triple::UnknownOS);
}
static llvm::Triple::ArchType toArch(llvm::StringRef str) {
template <>
llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
using llvm::Triple;
return llvm::StringSwitch<Triple::ArchType>(str)
return llvm::StringSwitch<Triple::ArchType>(Str)
.Case("arm", Triple::arm)
.Case("arm64", Triple::aarch64)
.Case("mips", Triple::mips)
@ -56,6 +63,13 @@ static llvm::Triple::ArchType toArch(llvm::StringRef str) {
.Default(Triple::UnknownArch);
}
template<typename T>
static T consume(llvm::StringRef &Str) {
llvm::StringRef Token;
std::tie(Token, Str) = getToken(Str);
return stringTo<T>(Token);
}
/// Return the number of hex digits needed to encode an (POD) object of a given
/// type.
template <typename T> static constexpr size_t hex_digits() {
@ -112,8 +126,8 @@ static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
: sizeof(data.uuid));
}
Record::Kind Record::classify(llvm::StringRef Line) {
Token Tok = toToken(getToken(Line).first);
llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
Token Tok = consume<Token>(Line);
switch (Tok) {
case Token::Module:
return Record::Module;
@ -126,36 +140,45 @@ Record::Kind Record::classify(llvm::StringRef Line) {
case Token::Public:
return Record::Public;
case Token::Stack:
return Record::Stack;
Tok = consume<Token>(Line);
switch (Tok) {
case Token::CFI:
Tok = consume<Token>(Line);
return Tok == Token::Init ? Record::StackCFIInit : Record::StackCFI;
default:
return llvm::None;
}
case Token::CodeID:
case Token::Unknown:
// Optimistically assume that any unrecognised token means this is a line
// record, those don't have a special keyword and start directly with a
// hex number. CODE_ID should never be at the start of a line, but if it
// is, it can be treated the same way as a garbled line record.
return Record::Line;
case Token::CodeID:
case Token::CFI:
case Token::Init:
// These should never appear at the start of a valid record.
return llvm::None;
}
llvm_unreachable("Fully covered switch above!");
}
llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
// MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
if (toToken(Str) != Token::Module)
if (consume<Token>(Line) != Token::Module)
return llvm::None;
std::tie(Str, Line) = getToken(Line);
llvm::Triple::OSType OS = toOS(Str);
llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
if (OS == llvm::Triple::UnknownOS)
return llvm::None;
std::tie(Str, Line) = getToken(Line);
llvm::Triple::ArchType Arch = toArch(Str);
llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
if (Arch == llvm::Triple::UnknownArch)
return llvm::None;
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
UUID ID = parseModuleId(OS, Str);
if (!ID)
@ -173,15 +196,13 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
// INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
if (consume<Token>(Line) != Token::Info)
return llvm::None;
if (consume<Token>(Line) != Token::CodeID)
return llvm::None;
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
if (toToken(Str) != Token::Info)
return llvm::None;
std::tie(Str, Line) = getToken(Line);
if (toToken(Str) != Token::CodeID)
return llvm::None;
std::tie(Str, Line) = getToken(Line);
// If we don't have any text following the code ID (e.g. on linux), we should
// use this as the UUID. Otherwise, we should revert back to the module ID.
@ -200,11 +221,10 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
// FILE number name
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
if (toToken(Str) != Token::File)
if (consume<Token>(Line) != Token::File)
return llvm::None;
llvm::StringRef Str;
size_t Number;
std::tie(Str, Line) = getToken(Line);
if (!to_integer(Str, Number))
@ -231,11 +251,10 @@ static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
Token Tok = Size ? Token::Func : Token::Public;
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
if (toToken(Str) != Tok)
if (consume<Token>(Line) != Tok)
return false;
llvm::StringRef Str;
std::tie(Str, Line) = getToken(Line);
Multiple = Str == "m";
@ -354,8 +373,10 @@ llvm::StringRef breakpad::toString(Record::Kind K) {
return "LINE";
case Record::Public:
return "PUBLIC";
case Record::Stack:
return "STACK";
case Record::StackCFIInit:
return "STACK CFI INIT";
case Record::StackCFI:
return "STACK CFI";
}
llvm_unreachable("Unknown record kind!");
}

View File

@ -20,12 +20,12 @@ namespace breakpad {
class Record {
public:
enum Kind { Module, Info, File, Func, Line, Public, Stack };
enum Kind { Module, Info, File, Func, Line, Public, StackCFIInit, StackCFI };
/// Attempt to guess the kind of the record present in the argument without
/// doing a full parse. The returned kind will always be correct for valid
/// records, but the full parse can still fail in case of corrupted input.
static Kind classify(llvm::StringRef Line);
static llvm::Optional<Kind> classify(llvm::StringRef Line);
protected:
Record(Kind K) : TheKind(K) {}

View File

@ -148,11 +148,14 @@ void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
llvm::StringRef line;
std::tie(line, text) = text.split('\n');
Record::Kind next_section = Record::classify(line);
llvm::Optional<Record::Kind> next_section = Record::classify(line);
if (next_section == Record::Line) {
// Line records logically belong to the preceding Func record, so we put
// them in the same section.
next_section = Record::Func;
} else if (next_section == Record::StackCFI) {
// Same goes for StackCFI and StackCFIInit
next_section = Record::StackCFIInit;
}
if (next_section == current_section)
continue;

View File

@ -19,13 +19,18 @@ TEST(Record, classify) {
EXPECT_EQ(Record::File, Record::classify("FILE"));
EXPECT_EQ(Record::Func, Record::classify("FUNC"));
EXPECT_EQ(Record::Public, Record::classify("PUBLIC"));
EXPECT_EQ(Record::Stack, Record::classify("STACK"));
EXPECT_EQ(Record::StackCFIInit, Record::classify("STACK CFI INIT"));
EXPECT_EQ(Record::StackCFI, Record::classify("STACK CFI"));
// Any obviously incorrect lines will be classified as such.
EXPECT_EQ(llvm::None, Record::classify("STACK"));
EXPECT_EQ(llvm::None, Record::classify("STACK CODE_ID"));
EXPECT_EQ(llvm::None, Record::classify("CODE_ID"));
// Any line which does not start with a known keyword will be classified as a
// line record, as those are the only ones that start without a keyword.
EXPECT_EQ(Record::Line, Record::classify("deadbeef"));
EXPECT_EQ(Record::Line, Record::classify("12"));
EXPECT_EQ(Record::Line, Record::classify("CODE_ID"));
}
TEST(ModuleRecord, parse) {