[Symbolizer] Add Build ID flag to llvm-symbolizer.

This adds a --build-id=<hex build ID> flag to llvm-symbolizer. If --obj
is unspecified, this will attempt to look up the provided build ID using
whatever mechanisms are available to the Symbolizer (typically,
debuginfod). The semantics are then as if the found binary were given
using the --obj flag.

Reviewed By: jhenderson, phosek

Differential Revision: https://reviews.llvm.org/D118633
This commit is contained in:
Daniel Thornburgh 2022-01-21 00:13:52 +00:00
parent bbddd19ec7
commit dcd4950d42
9 changed files with 179 additions and 38 deletions

View File

@ -183,6 +183,11 @@ OPTIONS
Print just the file's name without any directories, instead of the
absolute path.
.. option:: --build-id
Look up the object using the given build ID, specified as a hexadecimal
string. Mutually exclusive with :option:`--obj`.
.. _llvm-symbolizer-opt-C:
.. option:: --demangle, -C
@ -232,7 +237,8 @@ OPTIONS
.. option:: --obj <path>, --exe, -e
Path to object file to be symbolized. If ``-`` is specified, read the object
directly from the standard input stream.
directly from the standard input stream. Mutually exclusive with
:option:`--build-id`.
.. _llvm-symbolizer-opt-output-style:

View File

@ -13,6 +13,7 @@
#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
#include "llvm/ADT/StringMap.h"
#include "llvm/DebugInfo/Symbolize/DIFetcher.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/Object/Binary.h"
@ -62,22 +63,32 @@ public:
object::SectionedAddress ModuleOffset);
Expected<DILineInfo> symbolizeCode(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
Expected<DILineInfo> symbolizeCode(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);
Expected<DIInliningInfo>
symbolizeInlinedCode(const ObjectFile &Obj,
object::SectionedAddress ModuleOffset);
Expected<DIInliningInfo>
symbolizeInlinedCode(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
Expected<DIInliningInfo>
symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(const ObjectFile &Obj,
object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);
Expected<std::vector<DILocal>>
symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset);
Expected<std::vector<DILocal>>
symbolizeFrame(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset);
void flush();
static std::string
@ -117,6 +128,12 @@ private:
getOrCreateModuleInfo(const std::string &ModuleName);
Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
/// Returns a SymbolizableModule or an error if loading debug info failed.
/// Unlike the above, errors are reported each time, since they are more
/// likely to be transient.
Expected<SymbolizableModule *>
getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID);
Expected<SymbolizableModule *>
createModuleInfo(const ObjectFile *Obj, std::unique_ptr<DIContext> Context,
StringRef ModuleName);
@ -135,7 +152,8 @@ private:
const std::string &DebuglinkName, uint32_t CRCHash,
std::string &Result);
bool findDebugBinary(const ArrayRef<uint8_t> BuildID, std::string &Result);
bool getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
std::string &Result);
/// Returns pair of pointers to object and debug object.
Expected<ObjectPair> getOrCreateObjectPair(const std::string &Path,
@ -149,6 +167,7 @@ private:
std::map<std::string, std::unique_ptr<SymbolizableModule>, std::less<>>
Modules;
StringMap<std::string> BuildIDPaths;
/// Contains cached results of getOrCreateObjectPair().
std::map<std::pair<std::string, std::string>, ObjectPair>

View File

@ -80,6 +80,12 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
return symbolizeCodeCommon(ModuleName, ModuleOffset);
}
Expected<DILineInfo>
LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset) {
return symbolizeCodeCommon(BuildID, ModuleOffset);
}
template <typename T>
Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
@ -123,6 +129,12 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
}
Expected<DIInliningInfo>
LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset) {
return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
}
template <typename T>
Expected<DIGlobal>
LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
@ -162,6 +174,12 @@ LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
return symbolizeDataCommon(ModuleName, ModuleOffset);
}
Expected<DIGlobal>
LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset) {
return symbolizeDataCommon(BuildID, ModuleOffset);
}
template <typename T>
Expected<std::vector<DILocal>>
LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
@ -197,11 +215,18 @@ LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
return symbolizeFrameCommon(ModuleName, ModuleOffset);
}
Expected<std::vector<DILocal>>
LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
object::SectionedAddress ModuleOffset) {
return symbolizeFrameCommon(BuildID, ModuleOffset);
}
void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
BinaryForPath.clear();
ObjectPairForPathArch.clear();
Modules.clear();
BuildIDPaths.clear();
}
namespace {
@ -367,7 +392,7 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
if (BuildID->size() < 2)
return nullptr;
std::string DebugBinaryPath;
if (!findDebugBinary(*BuildID, DebugBinaryPath))
if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath))
return nullptr;
auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
if (!DbgObjOrErr) {
@ -421,12 +446,29 @@ bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
return false;
}
bool LLVMSymbolizer::findDebugBinary(const ArrayRef<uint8_t> BuildID,
std::string &Result) {
static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
return StringRef(reinterpret_cast<const char *>(BuildID.data()),
BuildID.size());
}
bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
std::string &Result) {
StringRef BuildIDStr = getBuildIDStr(BuildID);
auto I = BuildIDPaths.find(BuildIDStr);
if (I != BuildIDPaths.end()) {
Result = I->second;
return true;
}
auto recordPath = [&](StringRef Path) {
Result = Path.str();
auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
assert(InsertResult.second);
};
Optional<std::string> Path;
Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID);
if (Path) {
Result = std::move(*Path);
recordPath(*Path);
return true;
}
@ -434,7 +476,7 @@ bool LLVMSymbolizer::findDebugBinary(const ArrayRef<uint8_t> BuildID,
for (const std::unique_ptr<DIFetcher> &Fetcher : DIFetchers) {
Path = Fetcher->fetchBuildID(BuildID);
if (Path) {
Result = std::move(*Path);
recordPath(*Path);
return true;
}
}
@ -597,6 +639,17 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
return createModuleInfo(&Obj, std::move(Context), ObjName);
}
Expected<SymbolizableModule *>
LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
std::string Path;
if (!getOrFindDebugBinary(BuildID, Path)) {
return createStringError(errc::no_such_file_or_directory,
Twine("could not find build ID '") +
toHex(BuildID) + "'");
}
return getOrCreateModuleInfo(Path);
}
namespace {
// Undo these various manglings for Win32 extern "C" functions:

View File

@ -0,0 +1,3 @@
RUN: not llvm-symbolizer --build-id=not_a_hex_string 0x1234 2>&1 | FileCheck %s
CHECK: --build-id=: expected a build ID, but got 'not_a_hex_string'

View File

@ -0,0 +1,3 @@
RUN: not llvm-symbolizer --build-id=abc --obj=bad 0x1234 2>&1 | FileCheck %s
CHECK: error: cannot specify both --build-id and --obj

View File

@ -0,0 +1,10 @@
RUN: llvm-symbolizer --build-id=abad 0x1234 0x5678 > %t.stdout 2> %t.stderr
RUN: FileCheck %s --check-prefix=STDOUT < %t.stdout
RUN: FileCheck %s --check-prefix=STDERR < %t.stderr
STDOUT: ??
STDOUT: ??:0:0
STDOUT: ??
STDOUT: ??:0:0
STDERR-COUNT-2: LLVMSymbolizer: error reading file: could not find build ID 'ABAD'

View File

@ -25,3 +25,8 @@ RUN: %t/llvmcache-9800707741016212219
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
RUN: --obj=%t/addr.exe 0x40054d | FileCheck %s --check-prefix=FOUND
FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
# This should also work if the build ID is provided.
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
RUN: --build-id=127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d | \
RUN: FileCheck %s --check-prefix=FOUND

View File

@ -21,6 +21,7 @@ defm adjust_vma
: Eq<"adjust-vma", "Add specified offset to object file addresses">,
MetaVarName<"<offset>">;
def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">;
defm build_id : Eq<"build-id", "Build ID used to look up the object file">;
defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"<dir>">;
defm default_arch
: Eq<"default-arch", "Default architecture (for multi-arch objects)">,

View File

@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "Opts.inc"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
@ -103,8 +104,8 @@ enum class Command {
Frame,
};
static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
static bool parseCommand(StringRef BinaryName, ArrayRef<uint8_t> BuildID,
bool IsAddr2Line, StringRef InputString, Command &Cmd,
std::string &ModuleName, uint64_t &ModuleOffset) {
const char kDelimiters[] = " \n\r";
ModuleName = "";
@ -120,7 +121,7 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
}
const char *Pos = InputString.data();
// Skip delimiters and parse input filename (if needed).
if (BinaryName.empty()) {
if (BinaryName.empty() && BuildID.empty()) {
Pos += strspn(Pos, kDelimiters);
if (*Pos == '"' || *Pos == '\'') {
char Quote = *Pos;
@ -149,31 +150,24 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
}
static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
bool IsAddr2Line, OutputStyle Style,
StringRef InputString, LLVMSymbolizer &Symbolizer,
DIPrinter &Printer) {
Command Cmd;
std::string ModuleName;
uint64_t Offset = 0;
if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
StringRef(InputString), Cmd, ModuleName, Offset)) {
Printer.printInvalidCommand({ModuleName, None}, InputString);
return;
}
template <typename T>
void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
OutputStyle Style, LLVMSymbolizer &Symbolizer,
DIPrinter &Printer) {
uint64_t AdjustedOffset = Offset - AdjustVMA;
object::SectionedAddress Address = {AdjustedOffset,
object::SectionedAddress::UndefSection};
if (Cmd == Command::Data) {
Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(
ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
} else if (Cmd == Command::Frame) {
Expected<std::vector<DILocal>> ResOrErr = Symbolizer.symbolizeFrame(
ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
Expected<std::vector<DILocal>> ResOrErr =
Symbolizer.symbolizeFrame(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
} else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
} else if (ShouldInline) {
Expected<DIInliningInfo> ResOrErr =
Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
} else if (Style == OutputStyle::GNU) {
// With PrintFunctions == FunctionNameKind::LinkageName (default)
@ -182,8 +176,8 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
// caller function in the inlining chain. This contradicts the existing
// behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
// the topmost function, which suits our needs better.
Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
Expected<DIInliningInfo> ResOrErr =
Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
Expected<DILineInfo> Res0OrErr =
!ResOrErr
? Expected<DILineInfo>(ResOrErr.takeError())
@ -191,12 +185,37 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
: ResOrErr->getFrame(0));
print({ModuleName, Offset}, Res0OrErr, Printer);
} else {
Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
Expected<DILineInfo> ResOrErr =
Symbolizer.symbolizeCode(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
}
}
static void symbolizeInput(const opt::InputArgList &Args,
ArrayRef<uint8_t> BuildID, uint64_t AdjustVMA,
bool IsAddr2Line, OutputStyle Style,
StringRef InputString, LLVMSymbolizer &Symbolizer,
DIPrinter &Printer) {
Command Cmd;
std::string ModuleName;
uint64_t Offset = 0;
if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), BuildID, IsAddr2Line,
StringRef(InputString), Cmd, ModuleName, Offset)) {
Printer.printInvalidCommand({ModuleName, None}, InputString);
return;
}
bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
if (!BuildID.empty()) {
assert(ModuleName.empty());
std::string BuildIDStr = toHex(BuildID);
executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
Style, Symbolizer, Printer);
} else {
executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
Style, Symbolizer, Printer);
}
}
static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
raw_ostream &OS) {
const char HelpText[] = " [options] addresses...";
@ -261,6 +280,22 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
}
SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args, int ID) {
if (const opt::Arg *A = Args.getLastArg(ID)) {
StringRef V(A->getValue());
std::string Bytes;
if (!tryGetFromHex(V, Bytes)) {
errs() << A->getSpelling() + ": expected a build ID, but got '" + V +
"'\n";
exit(1);
}
ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
Bytes.size());
return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
}
return {};
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
@ -328,6 +363,12 @@ int main(int argc, char **argv) {
Style = OutputStyle::LLVM;
}
if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
errs() << "error: cannot specify both --build-id and --obj\n";
return EXIT_FAILURE;
}
SmallVector<uint8_t> BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
LLVMSymbolizer Symbolizer(Opts);
// Look up symbols using the debuginfod client.
@ -353,15 +394,15 @@ int main(int argc, char **argv) {
std::string StrippedInputString(InputString);
llvm::erase_if(StrippedInputString,
[](char c) { return c == '\r' || c == '\n'; });
symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString,
Symbolizer, *Printer);
symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
StrippedInputString, Symbolizer, *Printer);
outs().flush();
}
} else {
Printer->listBegin();
for (StringRef Address : InputAddresses)
symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer,
*Printer);
symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
Symbolizer, *Printer);
Printer->listEnd();
}