llvm-project/llvm/lib/Support/PrettyStackTrace.cpp

211 lines
6.4 KiB
C++
Raw Normal View History

//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines some helpful functions for dealing with the possibility of
// Unix signals occurring while your program is running.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm-c/ErrorHandling.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Watchdog.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdarg>
#include <cstdio>
#include <tuple>
#ifdef HAVE_CRASHREPORTERCLIENT_H
#include <CrashReporterClient.h>
#endif
using namespace llvm;
// If backtrace support is not enabled, compile out support for pretty stack
// traces. This has the secondary effect of not requiring thread local storage
// when backtrace support is disabled.
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
[LPM] Rip all of ManagedStatic and ThreadLocal out of the pretty stack tracing code. Managed static was just insane overhead for this. We took memory fences and external function calls in every path that pushed a pretty stack frame. This includes a multitude of layers setting up and tearing down passes, the parser in Clang, everywhere. For the regression test suite or low-overhead JITs, this was contributing to really significant overhead. Even the LLVM ThreadLocal is really overkill here because it uses pthread_{set,get}_specific logic, and has careful code to both allocate and delete the thread local data. We don't actually want any of that, and this code in particular has problems coping with deallocation. What we want is a single TLS pointer that is valid to use during global construction and during global destruction, any time we want. That is exactly what every host compiler and OS we use has implemented for a long time, and what was standardized in C++11. Even though not all of our host compilers support the thread_local keyword, we can directly use the platform-specific keywords to get the minimal functionality needed. Provided this limited trial survives the build bots, I will move this to Compiler.h so it is more widely available as a light weight if limited alternative to the ThreadLocal class. Many thanks to David Majnemer for helping me think through the implications across platforms and craft the MSVC-compatible syntax. The end result is *substantially* faster. When running llc in a tight loop over a small IR file targeting the aarch64 backend, this improves its performance by over 10% for me. It also seems likely to fix the remaining regressions seen by JIT users with threading enabled. This may actually have more impact on real-world compile times due to the use of the pretty stack tracing utility throughout the rest of Clang or LLVM, but I've not collected any detailed measurements. llvm-svn: 227300
2015-01-28 17:52:14 +08:00
// We need a thread local pointer to manage the stack of our stack trace
// objects, but we *really* cannot tolerate destructors running and do not want
// to pay any overhead of synchronizing. As a consequence, we use a raw
// thread-local variable.
static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr;
namespace llvm {
PrettyStackTraceEntry *ReverseStackTrace(PrettyStackTraceEntry *Head) {
PrettyStackTraceEntry *Prev = nullptr;
while (Head)
std::tie(Prev, Head, Head->NextEntry) =
std::make_tuple(Head, Head->NextEntry, Prev);
return Prev;
}
}
static void PrintStack(raw_ostream &OS) {
// Print out the stack in reverse order. To avoid recursion (which is likely
// to fail if we crashed due to stack overflow), we do an up-front pass to
// reverse the stack, then print it, then reverse it again.
unsigned ID = 0;
PrettyStackTraceEntry *ReversedStack =
llvm::ReverseStackTrace(PrettyStackTraceHead);
for (const PrettyStackTraceEntry *Entry = ReversedStack; Entry;
Entry = Entry->getNextEntry()) {
OS << ID++ << ".\t";
sys::Watchdog W(5);
Entry->print(OS);
}
llvm::ReverseStackTrace(ReversedStack);
}
/// PrintCurStackTrace - Print the current stack trace to the specified stream.
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
[LPM] Rip all of ManagedStatic and ThreadLocal out of the pretty stack tracing code. Managed static was just insane overhead for this. We took memory fences and external function calls in every path that pushed a pretty stack frame. This includes a multitude of layers setting up and tearing down passes, the parser in Clang, everywhere. For the regression test suite or low-overhead JITs, this was contributing to really significant overhead. Even the LLVM ThreadLocal is really overkill here because it uses pthread_{set,get}_specific logic, and has careful code to both allocate and delete the thread local data. We don't actually want any of that, and this code in particular has problems coping with deallocation. What we want is a single TLS pointer that is valid to use during global construction and during global destruction, any time we want. That is exactly what every host compiler and OS we use has implemented for a long time, and what was standardized in C++11. Even though not all of our host compilers support the thread_local keyword, we can directly use the platform-specific keywords to get the minimal functionality needed. Provided this limited trial survives the build bots, I will move this to Compiler.h so it is more widely available as a light weight if limited alternative to the ThreadLocal class. Many thanks to David Majnemer for helping me think through the implications across platforms and craft the MSVC-compatible syntax. The end result is *substantially* faster. When running llc in a tight loop over a small IR file targeting the aarch64 backend, this improves its performance by over 10% for me. It also seems likely to fix the remaining regressions seen by JIT users with threading enabled. This may actually have more impact on real-world compile times due to the use of the pretty stack tracing utility throughout the rest of Clang or LLVM, but I've not collected any detailed measurements. llvm-svn: 227300
2015-01-28 17:52:14 +08:00
if (!PrettyStackTraceHead) return;
// If there are pretty stack frames registered, walk and emit them.
OS << "Stack dump:\n";
PrintStack(OS);
OS.flush();
}
// Integrate with crash reporter libraries.
#if defined (__APPLE__) && defined(HAVE_CRASHREPORTERCLIENT_H)
// If any clients of llvm try to link to libCrashReporterClient.a themselves,
// only one crash info struct will be used.
2010-06-29 02:33:48 +08:00
extern "C" {
CRASH_REPORTER_CLIENT_HIDDEN
struct crashreporter_annotations_t gCRAnnotations
__attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
#if CRASHREPORTER_ANNOTATIONS_VERSION < 5
= { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
#else
= { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0, 0 };
#endif
2010-06-29 02:33:48 +08:00
}
#elif defined(__APPLE__) && HAVE_CRASHREPORTER_INFO
extern "C" const char *__crashreporter_info__
__attribute__((visibility("hidden"))) = 0;
asm(".desc ___crashreporter_info__, 0x10");
#endif
/// CrashHandler - This callback is run if a fatal signal is delivered to the
/// process, it prints the pretty stack trace.
static void CrashHandler(void *) {
#ifndef __APPLE__
// On non-apple systems, just emit the crash stack trace to stderr.
PrintCurStackTrace(errs());
#else
// Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
// put it into __crashreporter_info__.
SmallString<2048> TmpStr;
{
raw_svector_ostream Stream(TmpStr);
PrintCurStackTrace(Stream);
}
if (!TmpStr.empty()) {
#ifdef HAVE_CRASHREPORTERCLIENT_H
// Cast to void to avoid warning.
(void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
#elif HAVE_CRASHREPORTER_INFO
__crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
#endif
errs() << TmpStr.str();
}
#endif
}
// defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
#endif
PrettyStackTraceEntry::PrettyStackTraceEntry() {
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
// Link ourselves.
[LPM] Rip all of ManagedStatic and ThreadLocal out of the pretty stack tracing code. Managed static was just insane overhead for this. We took memory fences and external function calls in every path that pushed a pretty stack frame. This includes a multitude of layers setting up and tearing down passes, the parser in Clang, everywhere. For the regression test suite or low-overhead JITs, this was contributing to really significant overhead. Even the LLVM ThreadLocal is really overkill here because it uses pthread_{set,get}_specific logic, and has careful code to both allocate and delete the thread local data. We don't actually want any of that, and this code in particular has problems coping with deallocation. What we want is a single TLS pointer that is valid to use during global construction and during global destruction, any time we want. That is exactly what every host compiler and OS we use has implemented for a long time, and what was standardized in C++11. Even though not all of our host compilers support the thread_local keyword, we can directly use the platform-specific keywords to get the minimal functionality needed. Provided this limited trial survives the build bots, I will move this to Compiler.h so it is more widely available as a light weight if limited alternative to the ThreadLocal class. Many thanks to David Majnemer for helping me think through the implications across platforms and craft the MSVC-compatible syntax. The end result is *substantially* faster. When running llc in a tight loop over a small IR file targeting the aarch64 backend, this improves its performance by over 10% for me. It also seems likely to fix the remaining regressions seen by JIT users with threading enabled. This may actually have more impact on real-world compile times due to the use of the pretty stack tracing utility throughout the rest of Clang or LLVM, but I've not collected any detailed measurements. llvm-svn: 227300
2015-01-28 17:52:14 +08:00
NextEntry = PrettyStackTraceHead;
PrettyStackTraceHead = this;
#endif
}
PrettyStackTraceEntry::~PrettyStackTraceEntry() {
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
[LPM] Rip all of ManagedStatic and ThreadLocal out of the pretty stack tracing code. Managed static was just insane overhead for this. We took memory fences and external function calls in every path that pushed a pretty stack frame. This includes a multitude of layers setting up and tearing down passes, the parser in Clang, everywhere. For the regression test suite or low-overhead JITs, this was contributing to really significant overhead. Even the LLVM ThreadLocal is really overkill here because it uses pthread_{set,get}_specific logic, and has careful code to both allocate and delete the thread local data. We don't actually want any of that, and this code in particular has problems coping with deallocation. What we want is a single TLS pointer that is valid to use during global construction and during global destruction, any time we want. That is exactly what every host compiler and OS we use has implemented for a long time, and what was standardized in C++11. Even though not all of our host compilers support the thread_local keyword, we can directly use the platform-specific keywords to get the minimal functionality needed. Provided this limited trial survives the build bots, I will move this to Compiler.h so it is more widely available as a light weight if limited alternative to the ThreadLocal class. Many thanks to David Majnemer for helping me think through the implications across platforms and craft the MSVC-compatible syntax. The end result is *substantially* faster. When running llc in a tight loop over a small IR file targeting the aarch64 backend, this improves its performance by over 10% for me. It also seems likely to fix the remaining regressions seen by JIT users with threading enabled. This may actually have more impact on real-world compile times due to the use of the pretty stack tracing utility throughout the rest of Clang or LLVM, but I've not collected any detailed measurements. llvm-svn: 227300
2015-01-28 17:52:14 +08:00
assert(PrettyStackTraceHead == this &&
"Pretty stack trace entry destruction is out of order");
PrettyStackTraceHead = NextEntry;
#endif
}
void PrettyStackTraceString::print(raw_ostream &OS) const { OS << Str << "\n"; }
PrettyStackTraceFormat::PrettyStackTraceFormat(const char *Format, ...) {
va_list AP;
va_start(AP, Format);
const int SizeOrError = vsnprintf(nullptr, 0, Format, AP);
va_end(AP);
if (SizeOrError < 0) {
return;
}
const int Size = SizeOrError + 1; // '\0'
Str.resize(Size);
va_start(AP, Format);
vsnprintf(Str.data(), Size, Format, AP);
va_end(AP);
}
void PrettyStackTraceFormat::print(raw_ostream &OS) const { OS << Str << "\n"; }
void PrettyStackTraceProgram::print(raw_ostream &OS) const {
OS << "Program arguments: ";
// Print the argument list.
for (unsigned i = 0, e = ArgC; i != e; ++i)
OS << ArgV[i] << ' ';
OS << '\n';
}
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
static bool RegisterCrashPrinter() {
sys::AddSignalHandler(CrashHandler, nullptr);
return false;
}
#endif
void llvm::EnablePrettyStackTrace() {
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
// The first time this is called, we register the crash printer.
static bool HandlerRegistered = RegisterCrashPrinter();
(void)HandlerRegistered;
#endif
}
const void *llvm::SavePrettyStackState() {
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
return PrettyStackTraceHead;
#else
return nullptr;
#endif
}
void llvm::RestorePrettyStackState(const void *Top) {
#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
PrettyStackTraceHead =
static_cast<PrettyStackTraceEntry *>(const_cast<void *>(Top));
#endif
}
void LLVMEnablePrettyStackTrace() {
EnablePrettyStackTrace();
}