forked from OSchip/llvm-project
[msan] intercept dlopen and clear shadow for it
Summary: The loader does not call mmap() through the PLT because it has to bootstrap the process before libc is present. Hooking dlopen() isn't enough either because the loader runs module initializers before returning, and they could run arbitrary msan instrumented code. If msandr is present, then we can intercept the mmaps from dlopen at the syscall layer and clear the shadow there. If msandr is missing, we clear the shadow after dlopen() and hope any initializers are trivial. Reviewers: eugenis CC: kcc, llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D509 llvm-svn: 176818
This commit is contained in:
parent
06d274fdb7
commit
c9d382b5a4
|
@ -59,6 +59,7 @@ static THREADLOCAL struct {
|
|||
} __msan_stack_bounds;
|
||||
|
||||
static THREADLOCAL bool is_in_symbolizer;
|
||||
static THREADLOCAL bool is_in_loader;
|
||||
|
||||
extern "C" const int __msan_track_origins;
|
||||
int __msan_get_track_origins() {
|
||||
|
@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolizer = true; }
|
|||
void ExitSymbolizer() { is_in_symbolizer = false; }
|
||||
bool IsInSymbolizer() { return is_in_symbolizer; }
|
||||
|
||||
void EnterLoader() { is_in_loader = true; }
|
||||
void ExitLoader() { is_in_loader = false; }
|
||||
|
||||
extern "C" {
|
||||
SANITIZER_INTERFACE_ATTRIBUTE
|
||||
bool __msan_is_in_loader() { return is_in_loader; }
|
||||
}
|
||||
|
||||
static Flags msan_flags;
|
||||
|
||||
Flags *flags() {
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#define MEM_IS_SHADOW(mem) ((uptr)mem >= 0x200000000000ULL && \
|
||||
(uptr)mem <= 0x400000000000ULL)
|
||||
|
||||
struct link_map; // Opaque type returned by dlopen().
|
||||
|
||||
const int kMsanParamTlsSizeInWords = 100;
|
||||
const int kMsanRetvalTlsSizeInWords = 100;
|
||||
|
||||
|
@ -55,6 +57,9 @@ struct SymbolizerScope {
|
|||
~SymbolizerScope() { ExitSymbolizer(); }
|
||||
};
|
||||
|
||||
void EnterLoader();
|
||||
void ExitLoader();
|
||||
|
||||
void MsanDie();
|
||||
void PrintWarning(uptr pc, uptr bp);
|
||||
void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin);
|
||||
|
@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 origin);
|
|||
void ReportExpectedUMRNotFound(StackTrace *stack);
|
||||
void ReportAtExitStatistics();
|
||||
|
||||
void UnpoisonMappedDSO(struct link_map *map);
|
||||
|
||||
#define GET_MALLOC_STACK_TRACE \
|
||||
StackTrace stack; \
|
||||
stack.size = 0; \
|
||||
|
|
|
@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
|
|||
return res;
|
||||
}
|
||||
|
||||
// dlopen() ultimately calls mmap() down inside the loader, which generally
|
||||
// doesn't participate in dynamic symbol resolution. Therefore we won't
|
||||
// intercept its calls to mmap, and we have to hook it here. The loader
|
||||
// initializes the module before returning, so without the dynamic component, we
|
||||
// won't be able to clear the shadow before the initializers. Fixing this would
|
||||
// require putting our own initializer first to clear the shadow.
|
||||
INTERCEPTOR(void *, dlopen, const char *filename, int flag) {
|
||||
ENSURE_MSAN_INITED();
|
||||
EnterLoader();
|
||||
link_map *map = (link_map *)REAL(dlopen)(filename, flag);
|
||||
ExitLoader();
|
||||
if (!__msan_has_dynamic_component()) {
|
||||
// If msandr didn't clear the shadow before the initializers ran, we do it
|
||||
// ourselves afterwards.
|
||||
UnpoisonMappedDSO(map);
|
||||
}
|
||||
return (void *)map;
|
||||
}
|
||||
|
||||
INTERCEPTOR(int, getrusage, int who, void *usage) {
|
||||
ENSURE_MSAN_INITED();
|
||||
int res = REAL(getrusage)(who, usage);
|
||||
|
@ -973,6 +992,7 @@ void InitializeInterceptors() {
|
|||
INTERCEPT_FUNCTION(recvfrom);
|
||||
INTERCEPT_FUNCTION(recvmsg);
|
||||
INTERCEPT_FUNCTION(dladdr);
|
||||
INTERCEPT_FUNCTION(dlopen);
|
||||
INTERCEPT_FUNCTION(getrusage);
|
||||
inited = 1;
|
||||
}
|
||||
|
|
|
@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset();
|
|||
SANITIZER_INTERFACE_ATTRIBUTE
|
||||
int __msan_get_param_tls_offset();
|
||||
|
||||
// For intercepting mmap from ld.so in msandr.
|
||||
SANITIZER_INTERFACE_ATTRIBUTE
|
||||
bool __msan_is_in_loader();
|
||||
|
||||
// For testing.
|
||||
SANITIZER_INTERFACE_ATTRIBUTE
|
||||
u32 __msan_get_umr_origin();
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
|
||||
#include "msan.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <elf.h>
|
||||
#include <link.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
|
@ -87,6 +90,42 @@ static void MsanAtExit(void) {
|
|||
void InstallAtExitHandler() {
|
||||
atexit(MsanAtExit);
|
||||
}
|
||||
|
||||
void UnpoisonMappedDSO(link_map *map) {
|
||||
typedef ElfW(Phdr) Elf_Phdr;
|
||||
typedef ElfW(Ehdr) Elf_Ehdr;
|
||||
char *base = (char *)map->l_addr;
|
||||
Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
|
||||
char *phdrs = base + ehdr->e_phoff;
|
||||
char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
|
||||
|
||||
// Find the segment with the minimum base so we can "relocate" the p_vaddr
|
||||
// fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
|
||||
// objects have a non-zero base.
|
||||
uptr preferred_base = ~0ULL;
|
||||
for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
|
||||
Elf_Phdr *phdr = (Elf_Phdr *)iter;
|
||||
if (phdr->p_type == PT_LOAD)
|
||||
preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr);
|
||||
}
|
||||
|
||||
// Compute the delta from the real base to get a relocation delta.
|
||||
ptrdiff_t delta = (uptr)base - preferred_base;
|
||||
// Now we can figure out what the loader really mapped.
|
||||
for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
|
||||
Elf_Phdr *phdr = (Elf_Phdr *)iter;
|
||||
if (phdr->p_type == PT_LOAD) {
|
||||
uptr seg_start = phdr->p_vaddr + delta;
|
||||
uptr seg_end = seg_start + phdr->p_memsz;
|
||||
// None of these values are aligned. We consider the ragged edges of the
|
||||
// load command as defined, since they are mapped from the file.
|
||||
seg_start = RoundDownTo(seg_start, GetPageSizeCached());
|
||||
seg_end = RoundUpTo(seg_end, GetPageSizeCached());
|
||||
__msan_unpoison((void *)seg_start, seg_end - seg_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __msan
|
||||
|
||||
#endif // __linux__
|
||||
|
|
|
@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS
|
|||
|
||||
# Unittest sources and build flags.
|
||||
set(MSAN_UNITTEST_SOURCE msan_test.cc)
|
||||
set(MSAN_LOADABLE_SOURCE msan_loadable.cc)
|
||||
set(MSAN_UNITTEST_HEADERS
|
||||
msandr_test_so.h
|
||||
../../../include/sanitizer/msan_interface.h
|
||||
|
@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS
|
|||
# FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.
|
||||
-lstdc++
|
||||
)
|
||||
set(MSAN_LOADABLE_LINK_FLAGS
|
||||
-fsanitize=memory
|
||||
-shared
|
||||
)
|
||||
|
||||
# Compile source for the given architecture, using compiler
|
||||
# options in ${ARGN}, and add it to the object list.
|
||||
|
@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name arch)
|
|||
add_compiler_rt_test(${test_suite} ${test_name}
|
||||
OBJECTS ${ARGN}
|
||||
DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN}
|
||||
${MSAN_LOADABLE_SO}
|
||||
LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}
|
||||
${TARGET_LINK_FLAGS}
|
||||
"-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch)
|
|||
msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}
|
||||
${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
|
||||
|
||||
# Instrumented loadable module objects.
|
||||
set(MSAN_INST_LOADABLE_OBJECTS)
|
||||
msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch}
|
||||
${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
|
||||
|
||||
# Uninstrumented shared object for MSanDR tests.
|
||||
set(MSANDR_TEST_OBJECTS)
|
||||
msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}
|
||||
${MSAN_UNITTEST_COMMON_CFLAGS})
|
||||
|
||||
# Instrumented loadable library tests.
|
||||
set(MSAN_LOADABLE_SO)
|
||||
msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch}
|
||||
OBJECTS ${MSAN_INST_LOADABLE_OBJECTS}
|
||||
DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
|
||||
|
||||
# Uninstrumented shared library tests.
|
||||
set(MSANDR_TEST_SO)
|
||||
msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
//===-- msan_loadable.cc --------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of MemorySanitizer.
|
||||
//
|
||||
// MemorySanitizer unit tests.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "msan/msan_interface_internal.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static void *dso_global;
|
||||
|
||||
// No name mangling.
|
||||
extern "C" {
|
||||
|
||||
__attribute__((constructor))
|
||||
void loadable_module_init(void) {
|
||||
if (!__msan_has_dynamic_component())
|
||||
return;
|
||||
// The real test is that this compare should not make an uninit.
|
||||
if (dso_global == NULL)
|
||||
dso_global = malloc(4);
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
void loadable_module_fini(void) {
|
||||
if (!__msan_has_dynamic_component())
|
||||
return;
|
||||
free(dso_global);
|
||||
// *Don't* overwrite it with NULL! That would unpoison it, but our test
|
||||
// relies on reloading at the same address and keeping the poison.
|
||||
}
|
||||
|
||||
void **get_dso_global() {
|
||||
return &dso_global;
|
||||
}
|
||||
|
||||
}
|
|
@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) {
|
|||
EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);
|
||||
}
|
||||
|
||||
#ifdef __GLIBC__
|
||||
extern "C" {
|
||||
extern void *__libc_stack_end;
|
||||
}
|
||||
|
||||
static char **GetArgv(void) {
|
||||
uintptr_t *stack_end = (uintptr_t *)__libc_stack_end;
|
||||
return (char**)(stack_end + 1);
|
||||
}
|
||||
|
||||
#else // __GLIBC__
|
||||
# error "TODO: port this"
|
||||
#endif
|
||||
|
||||
TEST(MemorySanitizer, dlopen) {
|
||||
// Compute the path to our loadable DSO. We assume it's in the same
|
||||
// directory. Only use string routines that we intercept so far to do this.
|
||||
char **argv = GetArgv();
|
||||
const char *basename = "libmsan_loadable.x86_64.so";
|
||||
size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1;
|
||||
char *path = new char[path_max];
|
||||
char *last_slash = strrchr(argv[0], '/');
|
||||
assert(last_slash);
|
||||
snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]),
|
||||
argv[0], basename);
|
||||
|
||||
// We need to clear shadow for globals when doing dlopen. In order to test
|
||||
// this, we have to poison the shadow for the DSO before we load it. In
|
||||
// general this is difficult, but the loader tends to reload things in the
|
||||
// same place, so we open, close, and then reopen. The global should always
|
||||
// start out clean after dlopen.
|
||||
for (int i = 0; i < 2; i++) {
|
||||
void *lib = dlopen(path, RTLD_LAZY);
|
||||
if (lib == NULL) {
|
||||
printf("dlerror: %s\n", dlerror());
|
||||
assert(lib != NULL);
|
||||
}
|
||||
void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global");
|
||||
assert(get_dso_global);
|
||||
void **dso_global = get_dso_global();
|
||||
EXPECT_NOT_POISONED(*dso_global);
|
||||
__msan_poison(dso_global, sizeof(*dso_global));
|
||||
EXPECT_POISONED(*dso_global);
|
||||
dlclose(lib);
|
||||
}
|
||||
|
||||
delete[] path;
|
||||
}
|
||||
|
||||
TEST(MemorySanitizer, scanf) {
|
||||
const char *input = "42 hello";
|
||||
int* d = new int;
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <drsyscall.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h> /* for SYS_mmap */
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data_t *info)
|
|||
|
||||
int(*__msan_get_retval_tls_offset)();
|
||||
int(*__msan_get_param_tls_offset)();
|
||||
void (*__msan_unpoison)(void *base, size_t size);
|
||||
bool (*__msan_is_in_loader)();
|
||||
|
||||
static generic_func_t LookupCallback(module_data_t *app, const char *name) {
|
||||
generic_func_t callback = dr_get_proc_address(app->handle, name);
|
||||
if (callback == NULL) {
|
||||
dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
|
||||
CHECK(callback);
|
||||
}
|
||||
return callback;
|
||||
}
|
||||
|
||||
void InitializeMSanCallbacks() {
|
||||
module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
|
||||
|
@ -113,25 +125,18 @@ void InitializeMSanCallbacks() {
|
|||
}
|
||||
g_app_path = app->full_path;
|
||||
|
||||
const char *callback_name = "__msan_get_retval_tls_offset";
|
||||
__msan_get_retval_tls_offset =
|
||||
(int(*)()) dr_get_proc_address(app->handle, callback_name);
|
||||
if (__msan_get_retval_tls_offset == NULL) {
|
||||
dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
|
||||
CHECK(__msan_get_retval_tls_offset);
|
||||
}
|
||||
__msan_get_retval_tls_offset = (int (*)())
|
||||
LookupCallback(app, "__msan_get_retval_tls_offset");
|
||||
__msan_get_param_tls_offset = (int (*)())
|
||||
LookupCallback(app, "__msan_get_param_tls_offset");
|
||||
__msan_unpoison = (void(*)(void *, size_t))
|
||||
LookupCallback(app, "__msan_unpoison");
|
||||
__msan_is_in_loader = (bool (*)())
|
||||
LookupCallback(app, "__msan_is_in_loader");
|
||||
|
||||
callback_name = "__msan_get_param_tls_offset";
|
||||
__msan_get_param_tls_offset =
|
||||
(int(*)()) dr_get_proc_address(app->handle, callback_name);
|
||||
if (__msan_get_param_tls_offset == NULL) {
|
||||
dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
|
||||
CHECK(__msan_get_param_tls_offset);
|
||||
}
|
||||
dr_free_module_data(app);
|
||||
}
|
||||
|
||||
#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
|
||||
|
||||
// FIXME: Handle absolute addresses and PC-relative addresses.
|
||||
// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
|
||||
// a zero base anyway.
|
||||
|
@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
|
|||
|
||||
if (arg->pre)
|
||||
return true;
|
||||
if (arg->mode != DRSYS_PARAM_OUT)
|
||||
if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
|
||||
return true;
|
||||
|
||||
size_t sz = arg->size;
|
||||
|
@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
|
|||
(unsigned long long)(sz & 0xFFFFFFFF));
|
||||
}
|
||||
|
||||
void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
|
||||
memset(p, 0, sz);
|
||||
if (VERBOSITY > 0) {
|
||||
drmf_status_t res;
|
||||
drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
|
||||
const char *name;
|
||||
res = drsys_syscall_name(syscall, &name);
|
||||
dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
|
||||
name, arg->ordinal, arg->start_addr,
|
||||
(char *)arg->start_addr + sz);
|
||||
}
|
||||
|
||||
// We don't switch to the app context because __msan_unpoison() doesn't need
|
||||
// TLS segments.
|
||||
__msan_unpoison(arg->start_addr, sz);
|
||||
|
||||
return true; /* keep going */
|
||||
}
|
||||
|
@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext, int sysnum) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool IsInLoader(void *drcontext) {
|
||||
// TODO: This segment swap is inefficient. DR should just let us query the
|
||||
// app segment base, which it has. Alternatively, if we disable
|
||||
// -mangle_app_seg, then we won't need the swap.
|
||||
bool need_swap = !dr_using_app_state(drcontext);
|
||||
if (need_swap)
|
||||
dr_switch_to_app_state(drcontext);
|
||||
bool is_in_loader = __msan_is_in_loader();
|
||||
if (need_swap)
|
||||
dr_switch_to_dr_state(drcontext);
|
||||
return is_in_loader;
|
||||
}
|
||||
|
||||
void event_post_syscall(void *drcontext, int sysnum) {
|
||||
drsys_syscall_t *syscall;
|
||||
drsys_sysnum_t sysnum_full;
|
||||
|
@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext, int sysnum) {
|
|||
drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
|
||||
CHECK(res == DRMF_SUCCESS);
|
||||
}
|
||||
|
||||
// Our normal mmap interceptor can't intercept calls from the loader itself.
|
||||
// This means we don't clear the shadow for calls to dlopen. For now, we
|
||||
// solve this by intercepting mmap from ld.so here, but ideally we'd have a
|
||||
// solution that doesn't rely on msandr.
|
||||
//
|
||||
// Be careful not to intercept maps done by the msan rtl. Otherwise we end up
|
||||
// unpoisoning vast regions of memory and OOMing.
|
||||
// TODO: __msan_unpoison() could "flush" large regions of memory like tsan
|
||||
// does instead of doing a large memset. However, we need the memory to be
|
||||
// zeroed, where as tsan does not, so plain madvise is not enough.
|
||||
if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
|
||||
if (IsInLoader(drcontext)) {
|
||||
app_pc base = (app_pc)dr_syscall_get_result(drcontext);
|
||||
ptr_uint_t size;
|
||||
drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
|
||||
CHECK(res == DRMF_SUCCESS);
|
||||
if (VERBOSITY > 0)
|
||||
dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
|
||||
// We don't switch to the app context because __msan_unpoison() doesn't
|
||||
// need TLS segments.
|
||||
__msan_unpoison(base, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in New Issue