2010-07-28 23:40:20 +08:00
|
|
|
//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2010-07-28 23:40:20 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Support/CrashRecoveryContext.h"
|
2018-04-30 22:59:11 +08:00
|
|
|
#include "llvm/Config/llvm-config.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2020-05-20 17:10:31 +08:00
|
|
|
#include "llvm/Support/ExitCodes.h"
|
2013-09-13 01:46:57 +08:00
|
|
|
#include "llvm/Support/ManagedStatic.h"
|
2020-01-12 04:27:07 +08:00
|
|
|
#include "llvm/Support/Signals.h"
|
2010-11-30 02:16:10 +08:00
|
|
|
#include "llvm/Support/ThreadLocal.h"
|
2021-05-26 18:25:11 +08:00
|
|
|
#include "llvm/Support/thread.h"
|
2019-08-20 03:49:57 +08:00
|
|
|
#include <mutex>
|
2016-04-06 04:45:04 +08:00
|
|
|
#include <setjmp.h>
|
2020-01-12 04:27:07 +08:00
|
|
|
|
2010-07-28 23:40:20 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct CrashRecoveryContextImpl;
|
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static ManagedStatic<
|
2014-06-11 02:03:04 +08:00
|
|
|
sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2010-07-28 23:40:20 +08:00
|
|
|
struct CrashRecoveryContextImpl {
|
2015-08-08 01:32:06 +08:00
|
|
|
// When threads are disabled, this links up all active
|
|
|
|
// CrashRecoveryContextImpls. When threads are enabled there's one thread
|
|
|
|
// per CrashRecoveryContext and CurrentContext is a thread-local, so only one
|
|
|
|
// CrashRecoveryContextImpl is active per thread and this is always null.
|
2015-08-07 03:21:25 +08:00
|
|
|
const CrashRecoveryContextImpl *Next;
|
|
|
|
|
2010-08-18 06:32:37 +08:00
|
|
|
CrashRecoveryContext *CRC;
|
2010-07-28 23:40:20 +08:00
|
|
|
::jmp_buf JumpBuffer;
|
|
|
|
volatile unsigned Failed : 1;
|
2013-06-20 06:53:45 +08:00
|
|
|
unsigned SwitchedThread : 1;
|
2020-02-07 08:23:27 +08:00
|
|
|
unsigned ValidJumpBuffer : 1;
|
2010-07-28 23:40:20 +08:00
|
|
|
|
|
|
|
public:
|
2020-02-12 07:50:27 +08:00
|
|
|
CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept
|
2020-02-07 08:23:27 +08:00
|
|
|
: CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) {
|
2015-08-07 03:21:25 +08:00
|
|
|
Next = CurrentContext->get();
|
2013-09-13 01:46:57 +08:00
|
|
|
CurrentContext->set(this);
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
|
|
|
~CrashRecoveryContextImpl() {
|
2013-06-20 06:53:45 +08:00
|
|
|
if (!SwitchedThread)
|
2015-08-07 03:21:25 +08:00
|
|
|
CurrentContext->set(Next);
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
2010-07-28 23:40:20 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Called when the separate crash-recovery thread was finished, to
|
2013-06-20 06:53:45 +08:00
|
|
|
/// indicate that we don't need to clear the thread-local CurrentContext.
|
2018-07-31 03:41:25 +08:00
|
|
|
void setSwitchedThread() {
|
2015-08-07 03:21:25 +08:00
|
|
|
#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
|
|
|
|
SwitchedThread = true;
|
|
|
|
#endif
|
|
|
|
}
|
2013-06-20 06:53:45 +08:00
|
|
|
|
2020-01-12 04:27:07 +08:00
|
|
|
// If the function ran by the CrashRecoveryContext crashes or fails, then
|
|
|
|
// 'RetCode' represents the returned error code, as if it was returned by a
|
|
|
|
// process. 'Context' represents the signal type on Unix; on Windows, it is
|
|
|
|
// the ExceptionContext.
|
|
|
|
void HandleCrash(int RetCode, uintptr_t Context) {
|
2010-08-18 06:32:39 +08:00
|
|
|
// Eliminate the current context entry, to avoid re-entering in case the
|
|
|
|
// cleanup code crashes.
|
2015-08-07 03:21:25 +08:00
|
|
|
CurrentContext->set(Next);
|
2010-08-18 06:32:39 +08:00
|
|
|
|
2010-07-28 23:40:20 +08:00
|
|
|
assert(!Failed && "Crash recovery context already failed!");
|
|
|
|
Failed = true;
|
|
|
|
|
2020-01-12 04:27:07 +08:00
|
|
|
if (CRC->DumpStackAndCleanupOnFailure)
|
|
|
|
sys::CleanupOnSignal(Context);
|
|
|
|
|
|
|
|
CRC->RetCode = RetCode;
|
2010-07-28 23:40:20 +08:00
|
|
|
|
|
|
|
// Jump back to the RunSafely we were called under.
|
2020-02-07 08:23:27 +08:00
|
|
|
if (ValidJumpBuffer)
|
|
|
|
longjmp(JumpBuffer, 1);
|
|
|
|
|
|
|
|
// Otherwise let the caller decide of the outcome of the crash. Currently
|
|
|
|
// this occurs when using SEH on Windows with MSVC or clang-cl.
|
2010-07-28 23:40:20 +08:00
|
|
|
}
|
|
|
|
};
|
2021-01-28 02:37:19 +08:00
|
|
|
} // namespace
|
2010-07-28 23:40:20 +08:00
|
|
|
|
2021-02-11 05:36:10 +08:00
|
|
|
static ManagedStatic<std::mutex> gCrashRecoveryContextMutex;
|
|
|
|
static bool gCrashRecoveryEnabled = false;
|
2011-03-22 02:38:03 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
|
|
|
|
tlIsRecoveringFromCrash;
|
2016-04-06 04:19:49 +08:00
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void installExceptionOrSignalHandlers();
|
|
|
|
static void uninstallExceptionOrSignalHandlers();
|
|
|
|
|
2011-03-18 10:05:11 +08:00
|
|
|
CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
|
|
|
|
|
2020-09-23 06:22:01 +08:00
|
|
|
CrashRecoveryContext::CrashRecoveryContext() {
|
|
|
|
// On Windows, if abort() was previously triggered (and caught by a previous
|
|
|
|
// CrashRecoveryContext) the Windows CRT removes our installed signal handler,
|
|
|
|
// so we need to install it again.
|
|
|
|
sys::DisableSystemDialogsOnCrash();
|
|
|
|
}
|
|
|
|
|
2010-07-28 23:40:20 +08:00
|
|
|
CrashRecoveryContext::~CrashRecoveryContext() {
|
2011-03-18 10:05:11 +08:00
|
|
|
// Reclaim registered resources.
|
|
|
|
CrashRecoveryContextCleanup *i = head;
|
2015-08-07 03:21:25 +08:00
|
|
|
const CrashRecoveryContext *PC = tlIsRecoveringFromCrash->get();
|
|
|
|
tlIsRecoveringFromCrash->set(this);
|
2011-03-18 10:05:11 +08:00
|
|
|
while (i) {
|
|
|
|
CrashRecoveryContextCleanup *tmp = i;
|
|
|
|
i = tmp->next;
|
2011-03-19 08:59:37 +08:00
|
|
|
tmp->cleanupFired = true;
|
2011-03-22 12:33:13 +08:00
|
|
|
tmp->recoverResources();
|
2011-03-18 10:05:11 +08:00
|
|
|
delete tmp;
|
|
|
|
}
|
2015-08-07 03:21:25 +08:00
|
|
|
tlIsRecoveringFromCrash->set(PC);
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2010-07-28 23:40:20 +08:00
|
|
|
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
|
|
|
|
delete CRCI;
|
|
|
|
}
|
|
|
|
|
2011-03-22 02:38:03 +08:00
|
|
|
bool CrashRecoveryContext::isRecoveringFromCrash() {
|
2014-04-07 12:17:22 +08:00
|
|
|
return tlIsRecoveringFromCrash->get() != nullptr;
|
2011-03-22 02:38:03 +08:00
|
|
|
}
|
|
|
|
|
2010-08-18 06:32:37 +08:00
|
|
|
CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
|
2011-03-19 08:59:33 +08:00
|
|
|
if (!gCrashRecoveryEnabled)
|
2014-04-07 12:17:22 +08:00
|
|
|
return nullptr;
|
2011-03-19 08:59:33 +08:00
|
|
|
|
2013-09-13 01:46:57 +08:00
|
|
|
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
|
2010-08-18 06:32:37 +08:00
|
|
|
if (!CRCI)
|
2014-04-07 12:17:22 +08:00
|
|
|
return nullptr;
|
2010-08-18 06:32:37 +08:00
|
|
|
|
|
|
|
return CRCI->CRC;
|
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
void CrashRecoveryContext::Enable() {
|
2021-02-11 05:36:10 +08:00
|
|
|
std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex);
|
|
|
|
// FIXME: Shouldn't this be a refcount or something?
|
2017-05-18 02:16:17 +08:00
|
|
|
if (gCrashRecoveryEnabled)
|
|
|
|
return;
|
|
|
|
gCrashRecoveryEnabled = true;
|
|
|
|
installExceptionOrSignalHandlers();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CrashRecoveryContext::Disable() {
|
2021-02-11 05:36:10 +08:00
|
|
|
std::lock_guard<std::mutex> L(*gCrashRecoveryContextMutex);
|
2017-05-18 02:16:17 +08:00
|
|
|
if (!gCrashRecoveryEnabled)
|
|
|
|
return;
|
|
|
|
gCrashRecoveryEnabled = false;
|
|
|
|
uninstallExceptionOrSignalHandlers();
|
|
|
|
}
|
|
|
|
|
2011-03-18 10:05:11 +08:00
|
|
|
void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
|
|
|
|
{
|
|
|
|
if (!cleanup)
|
|
|
|
return;
|
|
|
|
if (head)
|
|
|
|
head->prev = cleanup;
|
|
|
|
cleanup->next = head;
|
|
|
|
head = cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
|
|
|
|
if (!cleanup)
|
|
|
|
return;
|
|
|
|
if (cleanup == head) {
|
|
|
|
head = cleanup->next;
|
|
|
|
if (head)
|
2014-04-07 12:17:22 +08:00
|
|
|
head->prev = nullptr;
|
2011-03-18 10:05:11 +08:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
cleanup->prev->next = cleanup->next;
|
|
|
|
if (cleanup->next)
|
|
|
|
cleanup->next->prev = cleanup->prev;
|
|
|
|
}
|
|
|
|
delete cleanup;
|
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
#if defined(_MSC_VER)
|
2020-02-11 23:17:15 +08:00
|
|
|
|
|
|
|
#include <windows.h> // for GetExceptionInformation
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
|
|
|
|
// better than VEH. Vectored exception handling catches all exceptions happening
|
|
|
|
// on the thread with installed exception handlers, so it can interfere with
|
|
|
|
// internal exception handling of other libraries on that thread. SEH works
|
|
|
|
// exactly as you would expect normal exception handling to work: it only
|
|
|
|
// catches exceptions if they would bubble out from the stack frame with __try /
|
|
|
|
// __except.
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void installExceptionOrSignalHandlers() {}
|
|
|
|
static void uninstallExceptionOrSignalHandlers() {}
|
2011-08-20 14:35:36 +08:00
|
|
|
|
2020-01-12 04:27:07 +08:00
|
|
|
// We need this function because the call to GetExceptionInformation() can only
|
|
|
|
// occur inside the __except evaluation block
|
2020-02-07 08:23:27 +08:00
|
|
|
static int ExceptionFilter(_EXCEPTION_POINTERS *Except) {
|
|
|
|
// Lookup the current thread local recovery object.
|
|
|
|
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
|
|
|
|
|
|
|
|
if (!CRCI) {
|
|
|
|
// Something has gone horribly wrong, so let's just tell everyone
|
|
|
|
// to keep searching
|
|
|
|
CrashRecoveryContext::Disable();
|
|
|
|
return EXCEPTION_CONTINUE_SEARCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
int RetCode = (int)Except->ExceptionRecord->ExceptionCode;
|
2020-02-11 23:17:15 +08:00
|
|
|
if ((RetCode & 0xF0000000) == 0xE0000000)
|
|
|
|
RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
|
2020-02-07 08:23:27 +08:00
|
|
|
|
|
|
|
// Handle the crash
|
|
|
|
const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
|
|
|
|
RetCode, reinterpret_cast<uintptr_t>(Except));
|
|
|
|
|
2020-01-12 04:27:07 +08:00
|
|
|
return EXCEPTION_EXECUTE_HANDLER;
|
|
|
|
}
|
|
|
|
|
2020-01-29 23:21:08 +08:00
|
|
|
#if defined(__clang__) && defined(_M_IX86)
|
|
|
|
// Work around PR44697.
|
|
|
|
__attribute__((optnone))
|
|
|
|
#endif
|
2020-01-12 04:27:07 +08:00
|
|
|
bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
|
|
|
|
if (!gCrashRecoveryEnabled) {
|
2017-05-18 02:16:17 +08:00
|
|
|
Fn();
|
2020-01-12 04:27:07 +08:00
|
|
|
return true;
|
2017-05-18 02:16:17 +08:00
|
|
|
}
|
2020-02-07 08:23:27 +08:00
|
|
|
assert(!Impl && "Crash recovery context already initialized!");
|
|
|
|
Impl = new CrashRecoveryContextImpl(this);
|
|
|
|
__try {
|
|
|
|
Fn();
|
|
|
|
} __except (ExceptionFilter(GetExceptionInformation())) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2017-05-18 02:16:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#else // !_MSC_VER
|
|
|
|
|
2018-04-29 08:45:03 +08:00
|
|
|
#if defined(_WIN32)
|
2017-05-18 02:16:17 +08:00
|
|
|
// This is a non-MSVC compiler, probably mingw gcc or clang without
|
|
|
|
// -fms-extensions. Use vectored exception handling (VEH).
|
|
|
|
//
|
|
|
|
// On Windows, we can make use of vectored exception handling to catch most
|
|
|
|
// crashing situations. Note that this does mean we will be alerted of
|
|
|
|
// exceptions *before* structured exception handling has the opportunity to
|
|
|
|
// catch it. Unfortunately, this causes problems in practice with other code
|
|
|
|
// running on threads with LLVM crash recovery contexts, so we would like to
|
|
|
|
// eventually move away from VEH.
|
2011-08-20 14:35:36 +08:00
|
|
|
//
|
2017-05-18 02:16:17 +08:00
|
|
|
// Vectored works on a per-thread basis, which is an advantage over
|
|
|
|
// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have
|
|
|
|
// any native support for chaining exception handlers, but VEH allows more than
|
|
|
|
// one.
|
2011-08-20 14:35:36 +08:00
|
|
|
//
|
|
|
|
// The vectored exception handler functionality was added in Windows
|
|
|
|
// XP, so if support for older versions of Windows is required,
|
|
|
|
// it will have to be added.
|
2017-05-18 02:16:17 +08:00
|
|
|
|
2020-02-28 16:59:24 +08:00
|
|
|
#include "llvm/Support/Windows/WindowsSupport.h"
|
2011-08-20 14:35:36 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
|
2011-08-20 14:35:36 +08:00
|
|
|
{
|
2017-05-18 00:39:33 +08:00
|
|
|
// DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported
|
|
|
|
// compilers and platforms, so we define it manually.
|
|
|
|
constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL;
|
2017-05-17 06:50:32 +08:00
|
|
|
switch (ExceptionInfo->ExceptionRecord->ExceptionCode)
|
|
|
|
{
|
|
|
|
case DBG_PRINTEXCEPTION_C:
|
2017-05-18 00:39:33 +08:00
|
|
|
case DbgPrintExceptionWideC:
|
2017-05-17 06:50:32 +08:00
|
|
|
case 0x406D1388: // set debugger thread name
|
|
|
|
return EXCEPTION_CONTINUE_EXECUTION;
|
|
|
|
}
|
|
|
|
|
2011-08-20 14:35:36 +08:00
|
|
|
// Lookup the current thread local recovery object.
|
2013-09-13 01:46:57 +08:00
|
|
|
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
|
2011-08-20 14:35:36 +08:00
|
|
|
|
|
|
|
if (!CRCI) {
|
|
|
|
// Something has gone horribly wrong, so let's just tell everyone
|
|
|
|
// to keep searching
|
|
|
|
CrashRecoveryContext::Disable();
|
|
|
|
return EXCEPTION_CONTINUE_SEARCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: We can capture the stack backtrace here and store it on the
|
|
|
|
// implementation if we so choose.
|
|
|
|
|
2020-02-11 23:17:15 +08:00
|
|
|
int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode;
|
|
|
|
if ((RetCode & 0xF0000000) == 0xE0000000)
|
|
|
|
RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
|
|
|
|
|
2011-08-20 14:35:36 +08:00
|
|
|
// Handle the crash
|
2020-01-12 04:27:07 +08:00
|
|
|
const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
|
2020-02-11 23:17:15 +08:00
|
|
|
RetCode, reinterpret_cast<uintptr_t>(ExceptionInfo));
|
2011-08-20 14:35:36 +08:00
|
|
|
|
|
|
|
// Note that we don't actually get here because HandleCrash calls
|
|
|
|
// longjmp, which means the HandleCrash function never returns.
|
|
|
|
llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Because the Enable and Disable calls are static, it means that
|
|
|
|
// there may not actually be an Impl available, or even a current
|
|
|
|
// CrashRecoveryContext at all. So we make use of a thread-local
|
|
|
|
// exception table. The handles contained in here will either be
|
|
|
|
// non-NULL, valid VEH handles, or NULL.
|
2016-04-06 04:45:04 +08:00
|
|
|
static sys::ThreadLocal<const void> sCurrentExceptionHandle;
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void installExceptionOrSignalHandlers() {
|
2011-08-20 14:35:36 +08:00
|
|
|
// We can set up vectored exception handling now. We will install our
|
|
|
|
// handler as the front of the list, though there's no assurances that
|
|
|
|
// it will remain at the front (another call could install itself before
|
|
|
|
// our handler). This 1) isn't likely, and 2) shouldn't cause problems.
|
|
|
|
PVOID handle = ::AddVectoredExceptionHandler(1, ExceptionHandler);
|
|
|
|
sCurrentExceptionHandle.set(handle);
|
2010-07-28 23:40:20 +08:00
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void uninstallExceptionOrSignalHandlers() {
|
2011-08-20 14:35:36 +08:00
|
|
|
PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
|
|
|
|
if (currentHandle) {
|
|
|
|
// Now we can remove the vectored exception handler from the chain
|
|
|
|
::RemoveVectoredExceptionHandler(currentHandle);
|
|
|
|
|
|
|
|
// Reset the handle in our thread-local set.
|
|
|
|
sCurrentExceptionHandle.set(NULL);
|
|
|
|
}
|
2010-07-28 23:40:20 +08:00
|
|
|
}
|
|
|
|
|
2018-04-29 08:45:03 +08:00
|
|
|
#else // !_WIN32
|
2010-07-29 09:21:47 +08:00
|
|
|
|
|
|
|
// Generic POSIX implementation.
|
|
|
|
//
|
|
|
|
// This implementation relies on synchronous signals being delivered to the
|
|
|
|
// current thread. We use a thread local object to keep track of the active
|
|
|
|
// crash recovery context, and install signal handlers to invoke HandleCrash on
|
|
|
|
// the active object.
|
|
|
|
//
|
2019-10-28 19:07:00 +08:00
|
|
|
// This implementation does not attempt to chain signal handlers in any
|
2010-07-29 09:21:47 +08:00
|
|
|
// reliable fashion -- if we get a signal outside of a crash recovery context we
|
|
|
|
// simply disable crash recovery and raise the signal again.
|
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
#include <signal.h>
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static const int Signals[] =
|
|
|
|
{ SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
|
|
|
|
static const unsigned NumSignals = array_lengthof(Signals);
|
|
|
|
static struct sigaction PrevActions[NumSignals];
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static void CrashRecoverySignalHandler(int Signal) {
|
2010-07-29 09:21:47 +08:00
|
|
|
// Lookup the current thread local recovery object.
|
2013-09-13 01:46:57 +08:00
|
|
|
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
|
2010-07-29 09:21:47 +08:00
|
|
|
|
|
|
|
if (!CRCI) {
|
|
|
|
// We didn't find a crash recovery context -- this means either we got a
|
|
|
|
// signal on a thread we didn't expect it on, the application got a signal
|
|
|
|
// outside of a crash recovery context, or something else went horribly
|
|
|
|
// wrong.
|
|
|
|
//
|
|
|
|
// Disable crash recovery and raise the signal again. The assumption here is
|
|
|
|
// that the enclosing application will terminate soon, and we won't want to
|
|
|
|
// attempt crash recovery again.
|
|
|
|
//
|
|
|
|
// This call of Disable isn't thread safe, but it doesn't actually matter.
|
|
|
|
CrashRecoveryContext::Disable();
|
|
|
|
raise(Signal);
|
2010-10-19 05:55:18 +08:00
|
|
|
|
|
|
|
// The signal will be thrown once the signal mask is restored.
|
|
|
|
return;
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unblock the signal we received.
|
|
|
|
sigset_t SigMask;
|
|
|
|
sigemptyset(&SigMask);
|
|
|
|
sigaddset(&SigMask, Signal);
|
2014-04-07 12:17:22 +08:00
|
|
|
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2020-09-24 20:14:45 +08:00
|
|
|
// Return the same error code as if the program crashed, as mentioned in the
|
|
|
|
// section "Exit Status for Commands":
|
|
|
|
// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
|
|
|
|
int RetCode = 128 + Signal;
|
2020-01-12 04:27:07 +08:00
|
|
|
|
|
|
|
// Don't consider a broken pipe as a crash (see clang/lib/Driver/Driver.cpp)
|
|
|
|
if (Signal == SIGPIPE)
|
|
|
|
RetCode = EX_IOERR;
|
|
|
|
|
2010-07-29 09:21:47 +08:00
|
|
|
if (CRCI)
|
2020-01-12 04:27:07 +08:00
|
|
|
const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(RetCode, Signal);
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void installExceptionOrSignalHandlers() {
|
2010-07-29 09:21:47 +08:00
|
|
|
// Setup the signal handler.
|
|
|
|
struct sigaction Handler;
|
|
|
|
Handler.sa_handler = CrashRecoverySignalHandler;
|
|
|
|
Handler.sa_flags = 0;
|
|
|
|
sigemptyset(&Handler.sa_mask);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i != NumSignals; ++i) {
|
2010-07-31 01:49:04 +08:00
|
|
|
sigaction(Signals[i], &Handler, &PrevActions[i]);
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
static void uninstallExceptionOrSignalHandlers() {
|
2010-07-29 09:21:47 +08:00
|
|
|
// Restore the previous signal handlers.
|
|
|
|
for (unsigned i = 0; i != NumSignals; ++i)
|
2014-04-07 12:17:22 +08:00
|
|
|
sigaction(Signals[i], &PrevActions[i], nullptr);
|
2010-07-29 09:21:47 +08:00
|
|
|
}
|
|
|
|
|
2018-04-29 08:45:03 +08:00
|
|
|
#endif // !_WIN32
|
2010-07-29 09:21:47 +08:00
|
|
|
|
2014-05-06 09:44:26 +08:00
|
|
|
bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
|
2010-07-28 23:40:20 +08:00
|
|
|
// If crash recovery is disabled, do nothing.
|
|
|
|
if (gCrashRecoveryEnabled) {
|
|
|
|
assert(!Impl && "Crash recovery context already initialized!");
|
2010-08-18 06:32:37 +08:00
|
|
|
CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
|
2010-07-28 23:40:20 +08:00
|
|
|
Impl = CRCI;
|
|
|
|
|
2020-02-07 08:23:27 +08:00
|
|
|
CRCI->ValidJumpBuffer = true;
|
2010-07-28 23:40:20 +08:00
|
|
|
if (setjmp(CRCI->JumpBuffer) != 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-06 09:44:26 +08:00
|
|
|
Fn();
|
2010-07-28 23:40:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-05-18 02:16:17 +08:00
|
|
|
#endif // !_MSC_VER
|
|
|
|
|
2021-07-29 00:31:14 +08:00
|
|
|
[[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
|
2020-02-11 23:17:15 +08:00
|
|
|
#if defined(_WIN32)
|
|
|
|
// SEH and VEH
|
|
|
|
::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
|
|
|
|
#else
|
|
|
|
// On Unix we don't need to raise an exception, we go directly to
|
|
|
|
// HandleCrash(), then longjmp will unwind the stack for us.
|
|
|
|
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl;
|
|
|
|
assert(CRCI && "Crash recovery context never initialized!");
|
|
|
|
CRCI->HandleCrash(RetCode, 0 /*no sig num*/);
|
|
|
|
#endif
|
|
|
|
llvm_unreachable("Most likely setjmp wasn't called!");
|
|
|
|
}
|
|
|
|
|
[LLD][COFF] When using LLD-as-a-library, always prevent re-entrance on failures
This is a follow-up for D70378 (Cover usage of LLD as a library).
While debugging an intermittent failure on a bot, I recalled this scenario which
causes the issue:
1.When executing lld/test/ELF/invalid/symtab-sh-info.s L45, we reach
lld::elf::Obj-File::ObjFile() which goes straight into its base ELFFileBase(),
then ELFFileBase::init().
2.At that point fatal() is thrown in lld/ELF/InputFiles.cpp L381, leaving a
half-initialized ObjFile instance.
3.We then end up in lld::exitLld() and since we are running with LLD_IN_TEST, we
hapily restore the control flow to CrashRecoveryContext::RunSafely() then back
in lld::safeLldMain().
4.Before this patch, we called errorHandler().reset() just after, and this
attempted to reset the associated SpecificAlloc<ObjFile<ELF64LE>>. That tried
to free the half-initialized ObjFile instance, and more precisely its
ObjFile::dwarf member.
Sometimes that worked, sometimes it failed and was catched by the
CrashRecoveryContext. This scenario was the reason we called
errorHandler().reset() through a CrashRecoveryContext.
But in some rare cases, the above repro somehow corrupted the heap, creating a
stack overflow. When the CrashRecoveryContext's filter (that is,
__except (ExceptionFilter(GetExceptionInformation()))) tried to handle the
exception, it crashed again since the stack was exhausted -- and that took the
whole application down. That is the issue seen on the bot. Locally it happens
about 1 times out of 15.
Now this situation can happen anywhere in LLD. Since catching stack overflows is
not a reliable scenario ATM when using CrashRecoveryContext, we're now
preventing further re-entrance when such failures occur, by signaling
lld::SafeReturn::canRunAgain=false. When running with LLD_IN_TEST=2 (or above),
only one iteration will be executed, instead of two.
Differential Revision: https://reviews.llvm.org/D88348
2020-11-12 21:14:20 +08:00
|
|
|
bool CrashRecoveryContext::throwIfCrash(int RetCode) {
|
|
|
|
#if defined(_WIN32)
|
|
|
|
// On Windows, the high bits are reserved for kernel return codes. Values
|
|
|
|
// starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
|
|
|
|
// and up are for "errors". In practice, both are interpreted as a
|
|
|
|
// non-continuable signal.
|
|
|
|
unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
|
|
|
|
if (Code != 0xC && Code != 8)
|
|
|
|
return false;
|
|
|
|
::RaiseException(RetCode, 0, 0, NULL);
|
|
|
|
#else
|
|
|
|
// On Unix, signals are represented by return codes of 128 or higher.
|
2020-11-12 21:31:57 +08:00
|
|
|
// Exit code 128 is a reserved value and should not be raised as a signal.
|
[LLD][COFF] When using LLD-as-a-library, always prevent re-entrance on failures
This is a follow-up for D70378 (Cover usage of LLD as a library).
While debugging an intermittent failure on a bot, I recalled this scenario which
causes the issue:
1.When executing lld/test/ELF/invalid/symtab-sh-info.s L45, we reach
lld::elf::Obj-File::ObjFile() which goes straight into its base ELFFileBase(),
then ELFFileBase::init().
2.At that point fatal() is thrown in lld/ELF/InputFiles.cpp L381, leaving a
half-initialized ObjFile instance.
3.We then end up in lld::exitLld() and since we are running with LLD_IN_TEST, we
hapily restore the control flow to CrashRecoveryContext::RunSafely() then back
in lld::safeLldMain().
4.Before this patch, we called errorHandler().reset() just after, and this
attempted to reset the associated SpecificAlloc<ObjFile<ELF64LE>>. That tried
to free the half-initialized ObjFile instance, and more precisely its
ObjFile::dwarf member.
Sometimes that worked, sometimes it failed and was catched by the
CrashRecoveryContext. This scenario was the reason we called
errorHandler().reset() through a CrashRecoveryContext.
But in some rare cases, the above repro somehow corrupted the heap, creating a
stack overflow. When the CrashRecoveryContext's filter (that is,
__except (ExceptionFilter(GetExceptionInformation()))) tried to handle the
exception, it crashed again since the stack was exhausted -- and that took the
whole application down. That is the issue seen on the bot. Locally it happens
about 1 times out of 15.
Now this situation can happen anywhere in LLD. Since catching stack overflows is
not a reliable scenario ATM when using CrashRecoveryContext, we're now
preventing further re-entrance when such failures occur, by signaling
lld::SafeReturn::canRunAgain=false. When running with LLD_IN_TEST=2 (or above),
only one iteration will be executed, instead of two.
Differential Revision: https://reviews.llvm.org/D88348
2020-11-12 21:14:20 +08:00
|
|
|
if (RetCode <= 128)
|
|
|
|
return false;
|
|
|
|
llvm::sys::unregisterHandlers();
|
|
|
|
raise(RetCode - 128);
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-06-26 07:54:50 +08:00
|
|
|
// FIXME: Portability.
|
2016-04-06 04:45:04 +08:00
|
|
|
static void setThreadBackgroundPriority() {
|
2014-06-26 07:54:50 +08:00
|
|
|
#ifdef __APPLE__
|
|
|
|
setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static bool hasThreadBackgroundPriority() {
|
2014-06-26 07:54:50 +08:00
|
|
|
#ifdef __APPLE__
|
|
|
|
return getpriority(PRIO_DARWIN_THREAD, 0) == 1;
|
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
2010-11-05 15:19:09 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
namespace {
|
2010-11-05 15:19:09 +08:00
|
|
|
struct RunSafelyOnThreadInfo {
|
2014-05-06 09:44:26 +08:00
|
|
|
function_ref<void()> Fn;
|
2010-11-05 15:19:09 +08:00
|
|
|
CrashRecoveryContext *CRC;
|
2014-06-26 07:54:50 +08:00
|
|
|
bool UseBackgroundPriority;
|
2010-11-05 15:19:09 +08:00
|
|
|
bool Result;
|
|
|
|
};
|
2021-01-28 02:37:19 +08:00
|
|
|
} // namespace
|
2010-11-05 15:19:09 +08:00
|
|
|
|
2016-04-06 04:45:04 +08:00
|
|
|
static void RunSafelyOnThread_Dispatch(void *UserData) {
|
2010-11-05 15:19:09 +08:00
|
|
|
RunSafelyOnThreadInfo *Info =
|
|
|
|
reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
|
2014-06-26 07:54:50 +08:00
|
|
|
|
|
|
|
if (Info->UseBackgroundPriority)
|
|
|
|
setThreadBackgroundPriority();
|
|
|
|
|
2014-05-06 09:44:26 +08:00
|
|
|
Info->Result = Info->CRC->RunSafely(Info->Fn);
|
2010-11-05 15:19:09 +08:00
|
|
|
}
|
2014-05-06 09:44:26 +08:00
|
|
|
bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
|
2010-11-05 15:19:09 +08:00
|
|
|
unsigned RequestedStackSize) {
|
2014-06-26 07:54:50 +08:00
|
|
|
bool UseBackgroundPriority = hasThreadBackgroundPriority();
|
|
|
|
RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false };
|
2021-05-26 18:25:11 +08:00
|
|
|
llvm::thread Thread(RequestedStackSize == 0
|
|
|
|
? llvm::None
|
|
|
|
: llvm::Optional<unsigned>(RequestedStackSize),
|
|
|
|
RunSafelyOnThread_Dispatch, &Info);
|
|
|
|
Thread.join();
|
|
|
|
|
2013-06-20 06:53:45 +08:00
|
|
|
if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl)
|
|
|
|
CRC->setSwitchedThread();
|
2010-11-05 15:19:09 +08:00
|
|
|
return Info.Result;
|
|
|
|
}
|