forked from OSchip/llvm-project
[Debuginfod] Add BUILD_ID syntax to llvm-symbolizer.
This adds a BUILD_ID prefix to the llvm-symbolizer stdin and argument syntax. The prefix causes the given binary name to be interpreted as a build ID instead of an object file path. The semantics are analagous to the behavior of --obj and --build-id. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D119901
This commit is contained in:
parent
0d058ed3d6
commit
565add5a62
|
@ -11,23 +11,26 @@ SYNOPSIS
|
|||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
:program:`llvm-symbolizer` reads object file names and addresses from the
|
||||
command-line and prints corresponding source code locations to standard output.
|
||||
:program:`llvm-symbolizer` reads input names and addresses from the command-line
|
||||
and prints corresponding source code locations to standard output.
|
||||
|
||||
If no address is specified on the command-line, it reads the addresses from
|
||||
standard input. If no object file is specified on the command-line, but
|
||||
addresses are, or if at any time an input value is not recognized, the input is
|
||||
simply echoed to the output.
|
||||
standard input. If no input name is specified on the command-line, but addresses
|
||||
are, or if at any time an input value is not recognized, the input is simply
|
||||
echoed to the output.
|
||||
|
||||
Input names can be specified together with the addresses either on standard
|
||||
input or as positional arguments on the command-line. By default, input names
|
||||
are interpreted as object file paths. However, prefixing a name with
|
||||
``BUILDID:`` states that it is a hex build ID rather than a path. This will look
|
||||
up the corresponding debug binary. For consistency, prefixing a name with
|
||||
``FILE:`` explicitly states that it is an object file path (the default).
|
||||
|
||||
A positional argument or standard input value can be preceded by "DATA" or
|
||||
"CODE" to indicate that the address should be symbolized as data or executable
|
||||
code respectively. If neither is specified, "CODE" is assumed. DATA is
|
||||
symbolized as address and symbol size rather than line number.
|
||||
|
||||
Object files can be specified together with the addresses either on standard
|
||||
input or as positional arguments on the command-line, following any "DATA" or
|
||||
"CODE" prefix.
|
||||
|
||||
:program:`llvm-symbolizer` parses options from the environment variable
|
||||
``LLVM_SYMBOLIZER_OPTS`` after parsing options from the command line.
|
||||
``LLVM_SYMBOLIZER_OPTS`` is primarily useful for supplementing the command-line
|
||||
|
@ -107,7 +110,7 @@ Example 3 - object specified with address:
|
|||
|
||||
.. code-block:: console
|
||||
|
||||
$ llvm-symbolizer "test.elf 0x400490" "inlined.elf 0x400480"
|
||||
$ llvm-symbolizer "test.elf 0x400490" "FILE:inlined.elf 0x400480"
|
||||
baz()
|
||||
/tmp/test.cpp:11:0
|
||||
|
||||
|
@ -115,7 +118,7 @@ Example 3 - object specified with address:
|
|||
/tmp/test.cpp:8:10
|
||||
|
||||
$ cat addr2.txt
|
||||
test.elf 0x4004a0
|
||||
FILE:test.elf 0x4004a0
|
||||
inlined.elf 0x400480
|
||||
|
||||
$ llvm-symbolizer < addr2.txt
|
||||
|
@ -125,7 +128,29 @@ Example 3 - object specified with address:
|
|||
foo()
|
||||
/tmp/test.cpp:8:10
|
||||
|
||||
Example 4 - CODE and DATA prefixes:
|
||||
Example 4 - BUILDID and FILE prefixes:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ llvm-symbolizer "FILE:test.elf 0x400490" "DATA BUILDID:123456789abcdef 0x601028"
|
||||
baz()
|
||||
/tmp/test.cpp:11:0
|
||||
|
||||
bar
|
||||
6295592 4
|
||||
|
||||
$ cat addr3.txt
|
||||
FILE:test.elf 0x400490
|
||||
DATA BUILDID:123456789abcdef 0x601028
|
||||
|
||||
$ llvm-symbolizer < addr3.txt
|
||||
baz()
|
||||
/tmp/test.cpp:11:0
|
||||
|
||||
bar
|
||||
6295592 4
|
||||
|
||||
Example 5 - CODE and DATA prefixes:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
|
@ -136,18 +161,18 @@ Example 4 - CODE and DATA prefixes:
|
|||
bar
|
||||
6295592 4
|
||||
|
||||
$ cat addr3.txt
|
||||
$ cat addr4.txt
|
||||
CODE test.elf 0x4004a0
|
||||
DATA inlined.elf 0x601028
|
||||
|
||||
$ llvm-symbolizer < addr3.txt
|
||||
$ llvm-symbolizer < addr4.txt
|
||||
main
|
||||
/tmp/test.cpp:15:0
|
||||
|
||||
bar
|
||||
6295592 4
|
||||
|
||||
Example 5 - path-style options:
|
||||
Example 6 - path-style options:
|
||||
|
||||
This example uses the same source file as above, but the source file's
|
||||
full path is /tmp/foo/test.cpp and is compiled as follows. The first case
|
||||
|
|
|
@ -27,11 +27,52 @@ RUN: --obj=%t/addr.exe 0x40054d --debuginfod | \
|
|||
RUN: FileCheck %s --check-prefix=FOUND
|
||||
FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
|
||||
|
||||
# This should also work if the build ID is provided.
|
||||
# This should also work if the build ID is provided via flag.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: --build-id=127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d | \
|
||||
RUN: FileCheck %s --check-prefix=FOUND
|
||||
|
||||
# This should also work if the build ID is provided via stdin.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=FOUND
|
||||
|
||||
# Passing BUILDID twice is a syntax error.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: "BUILDID:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=BUILDIDBUILDID
|
||||
BUILDIDBUILDID: BUILDID:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d
|
||||
|
||||
# CODE should work preceding build ID.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: "CODE BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=FOUND
|
||||
|
||||
# The symbolizer shouldn't call the debuginfod library by default with no URLs.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer --print-address \
|
||||
RUN: --obj=%t/addr.exe 0x40054d | FileCheck %s --check-prefix=NOTFOUND
|
||||
|
||||
# The symbolizer shouldn't call the debuginfod library if explicitly disabled.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: --no-debuginfod \
|
||||
RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=NOTHINGFOUND
|
||||
NOTHINGFOUND: ??
|
||||
NOTHINGFOUND-NEXT: ??:0:0
|
||||
|
||||
# BUILDID shouldn't be parsed if --obj is given, just like regular filenames.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: --obj=%t/addr.exe \
|
||||
RUN: "BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=BUILDIDIGNORED
|
||||
BUILDIDIGNORED: BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d
|
||||
|
||||
# Providing both BUILDID and FILE is a syntax error.
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: "BUILDID:FILE:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=BUILDIDFILE
|
||||
BUILDIDFILE: BUILDID:FILE:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d
|
||||
RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \
|
||||
RUN: "FILE:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=FILEBUILDID
|
||||
FILEBUILDID: FILE:BUILDID:127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
# The FILE prefix acts as a no-op, but it provides consistency with BUILDID.
|
||||
RUN: llvm-symbolizer "CODE FILE:%p/Inputs/addr.exe 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=FOUND
|
||||
FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
|
||||
|
||||
# Passing FILE twice is a syntax error.
|
||||
RUN: llvm-symbolizer "CODE FILE:FILE:%p/Inputs/addr.exe 0x40054d" | \
|
||||
RUN: FileCheck %s --check-prefix=FILEFILE
|
||||
FILEFILE: CODE FILE:FILE:{{.*}}/Inputs/addr.exe 0x40054d
|
|
@ -106,9 +106,31 @@ enum class Command {
|
|||
Frame,
|
||||
};
|
||||
|
||||
static bool parseCommand(StringRef BinaryName, ArrayRef<uint8_t> BuildID,
|
||||
bool IsAddr2Line, StringRef InputString, Command &Cmd,
|
||||
std::string &ModuleName, uint64_t &ModuleOffset) {
|
||||
static void enableDebuginfod(LLVMSymbolizer &Symbolizer) {
|
||||
static bool IsEnabled = false;
|
||||
if (IsEnabled)
|
||||
return;
|
||||
IsEnabled = true;
|
||||
// Look up symbols using the debuginfod client.
|
||||
Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>());
|
||||
// The HTTPClient must be initialized for use by the debuginfod client.
|
||||
HTTPClient::initialize();
|
||||
}
|
||||
|
||||
static SmallVector<uint8_t> parseBuildID(StringRef Str) {
|
||||
std::string Bytes;
|
||||
if (!tryGetFromHex(Str, Bytes))
|
||||
return {};
|
||||
ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
|
||||
Bytes.size());
|
||||
return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
|
||||
}
|
||||
|
||||
static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
|
||||
StringRef InputString, Command &Cmd,
|
||||
std::string &ModuleName,
|
||||
SmallVectorImpl<uint8_t> &BuildID,
|
||||
uint64_t &ModuleOffset) {
|
||||
const char kDelimiters[] = " \n\r";
|
||||
ModuleName = "";
|
||||
if (InputString.consume_front("CODE ")) {
|
||||
|
@ -121,9 +143,31 @@ static bool parseCommand(StringRef BinaryName, ArrayRef<uint8_t> BuildID,
|
|||
// If no cmd, assume it's CODE.
|
||||
Cmd = Command::Code;
|
||||
}
|
||||
const char *Pos = InputString.data();
|
||||
|
||||
const char *Pos;
|
||||
// Skip delimiters and parse input filename (if needed).
|
||||
if (BinaryName.empty() && BuildID.empty()) {
|
||||
bool HasFilePrefix = false;
|
||||
bool HasBuildIDPrefix = false;
|
||||
while (true) {
|
||||
if (InputString.consume_front("FILE:")) {
|
||||
if (HasFilePrefix)
|
||||
return false;
|
||||
HasFilePrefix = true;
|
||||
continue;
|
||||
}
|
||||
if (InputString.consume_front("BUILDID:")) {
|
||||
if (HasBuildIDPrefix)
|
||||
return false;
|
||||
HasBuildIDPrefix = true;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (HasFilePrefix && HasBuildIDPrefix)
|
||||
return false;
|
||||
|
||||
Pos = InputString.data();
|
||||
Pos += strspn(Pos, kDelimiters);
|
||||
if (*Pos == '"' || *Pos == '\'') {
|
||||
char Quote = *Pos;
|
||||
|
@ -138,7 +182,14 @@ static bool parseCommand(StringRef BinaryName, ArrayRef<uint8_t> BuildID,
|
|||
ModuleName = std::string(Pos, NameLength);
|
||||
Pos += NameLength;
|
||||
}
|
||||
if (HasBuildIDPrefix) {
|
||||
BuildID = parseBuildID(ModuleName);
|
||||
if (BuildID.empty())
|
||||
return false;
|
||||
ModuleName.clear();
|
||||
}
|
||||
} else {
|
||||
Pos = InputString.data();
|
||||
ModuleName = BinaryName.str();
|
||||
}
|
||||
// Skip delimiters and parse module offset.
|
||||
|
@ -195,21 +246,24 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
|
|||
}
|
||||
|
||||
static void symbolizeInput(const opt::InputArgList &Args,
|
||||
ArrayRef<uint8_t> BuildID, uint64_t AdjustVMA,
|
||||
bool IsAddr2Line, OutputStyle Style,
|
||||
StringRef InputString, LLVMSymbolizer &Symbolizer,
|
||||
DIPrinter &Printer) {
|
||||
ArrayRef<uint8_t> IncomingBuildID,
|
||||
uint64_t AdjustVMA, bool IsAddr2Line,
|
||||
OutputStyle Style, StringRef InputString,
|
||||
LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
|
||||
Command Cmd;
|
||||
std::string ModuleName;
|
||||
SmallVector<uint8_t> BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
|
||||
uint64_t Offset = 0;
|
||||
if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), BuildID, IsAddr2Line,
|
||||
StringRef(InputString), Cmd, ModuleName, Offset)) {
|
||||
if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
|
||||
StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
|
||||
Printer.printInvalidCommand({ModuleName, None}, InputString);
|
||||
return;
|
||||
}
|
||||
bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
|
||||
if (!BuildID.empty()) {
|
||||
assert(ModuleName.empty());
|
||||
if (!Args.hasArg(OPT_no_debuginfod))
|
||||
enableDebuginfod(Symbolizer);
|
||||
std::string BuildIDStr = toHex(BuildID);
|
||||
executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
|
||||
Style, Symbolizer, Printer);
|
||||
|
@ -283,43 +337,23 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
|
|||
return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
|
||||
}
|
||||
|
||||
SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args, int ID) {
|
||||
if (const opt::Arg *A = Args.getLastArg(ID)) {
|
||||
StringRef V(A->getValue());
|
||||
std::string Bytes;
|
||||
if (!tryGetFromHex(V, Bytes)) {
|
||||
errs() << A->getSpelling() + ": expected a build ID, but got '" + V +
|
||||
"'\n";
|
||||
exit(1);
|
||||
}
|
||||
ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
|
||||
Bytes.size());
|
||||
return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
|
||||
static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args,
|
||||
int ID) {
|
||||
const opt::Arg *A = Args.getLastArg(ID);
|
||||
if (!A)
|
||||
return {};
|
||||
|
||||
StringRef V(A->getValue());
|
||||
SmallVector<uint8_t> BuildID = parseBuildID(V);
|
||||
if (BuildID.empty()) {
|
||||
errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
|
||||
exit(1);
|
||||
}
|
||||
return {};
|
||||
return BuildID;
|
||||
}
|
||||
|
||||
ExitOnError ExitOnErr;
|
||||
|
||||
static bool shouldUseDebuginfodByDefault(ArrayRef<uint8_t> BuildID) {
|
||||
// If the user explicitly specified a build ID, the usual way to find it is
|
||||
// debuginfod.
|
||||
if (!BuildID.empty())
|
||||
return true;
|
||||
|
||||
// A debuginfod lookup could succeed if a HTTP client is available and at
|
||||
// least one backing URL is configured.
|
||||
if (HTTPClient::isAvailable() &&
|
||||
!ExitOnErr(getDefaultDebuginfodUrls()).empty())
|
||||
return true;
|
||||
|
||||
// A debuginfod lookup could also succeed if something were present in the
|
||||
// cache directory, but it would be surprising to enable debuginfod on this
|
||||
// basis alone. To use existing caches in an "offline" fashion, the debuginfod
|
||||
// flag must be set.
|
||||
return false;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
InitLLVM X(argc, argv);
|
||||
sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
|
||||
|
@ -397,13 +431,14 @@ int main(int argc, char **argv) {
|
|||
|
||||
LLVMSymbolizer Symbolizer(Opts);
|
||||
|
||||
// A debuginfod lookup could succeed if a HTTP client is available and at
|
||||
// least one backing URL is configured.
|
||||
bool ShouldUseDebuginfodByDefault =
|
||||
HTTPClient::isAvailable() &&
|
||||
!ExitOnErr(getDefaultDebuginfodUrls()).empty();
|
||||
if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod,
|
||||
shouldUseDebuginfodByDefault(BuildID))) {
|
||||
// Look up symbols using the debuginfod client.
|
||||
Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>());
|
||||
// The HTTPClient must be initialized for use by the debuginfod client.
|
||||
HTTPClient::initialize();
|
||||
}
|
||||
ShouldUseDebuginfodByDefault))
|
||||
enableDebuginfod(Symbolizer);
|
||||
|
||||
std::unique_ptr<DIPrinter> Printer;
|
||||
if (Style == OutputStyle::GNU)
|
||||
|
|
Loading…
Reference in New Issue