diff --git a/compiler-rt/lib/msan/msan.cc b/compiler-rt/lib/msan/msan.cc index cfa0741828e5..96f99d4a8e4a 100644 --- a/compiler-rt/lib/msan/msan.cc +++ b/compiler-rt/lib/msan/msan.cc @@ -59,6 +59,7 @@ static THREADLOCAL struct { } __msan_stack_bounds; static THREADLOCAL bool is_in_symbolizer; +static THREADLOCAL bool is_in_loader; extern "C" const int __msan_track_origins; int __msan_get_track_origins() { @@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolizer = true; } void ExitSymbolizer() { is_in_symbolizer = false; } bool IsInSymbolizer() { return is_in_symbolizer; } +void EnterLoader() { is_in_loader = true; } +void ExitLoader() { is_in_loader = false; } + +extern "C" { +SANITIZER_INTERFACE_ATTRIBUTE +bool __msan_is_in_loader() { return is_in_loader; } +} + static Flags msan_flags; Flags *flags() { diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index fae1ad58a6d6..123dd36bf352 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -26,6 +26,8 @@ #define MEM_IS_SHADOW(mem) ((uptr)mem >= 0x200000000000ULL && \ (uptr)mem <= 0x400000000000ULL) +struct link_map; // Opaque type returned by dlopen(). + const int kMsanParamTlsSizeInWords = 100; const int kMsanRetvalTlsSizeInWords = 100; @@ -55,6 +57,9 @@ struct SymbolizerScope { ~SymbolizerScope() { ExitSymbolizer(); } }; +void EnterLoader(); +void ExitLoader(); + void MsanDie(); void PrintWarning(uptr pc, uptr bp); void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin); @@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 origin); void ReportExpectedUMRNotFound(StackTrace *stack); void ReportAtExitStatistics(); +void UnpoisonMappedDSO(struct link_map *map); + #define GET_MALLOC_STACK_TRACE \ StackTrace stack; \ stack.size = 0; \ diff --git a/compiler-rt/lib/msan/msan_interceptors.cc b/compiler-rt/lib/msan/msan_interceptors.cc index a6f25e8aa044..f81c8de89ef7 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cc +++ b/compiler-rt/lib/msan/msan_interceptors.cc @@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) { return res; } +// dlopen() ultimately calls mmap() down inside the loader, which generally +// doesn't participate in dynamic symbol resolution. Therefore we won't +// intercept its calls to mmap, and we have to hook it here. The loader +// initializes the module before returning, so without the dynamic component, we +// won't be able to clear the shadow before the initializers. Fixing this would +// require putting our own initializer first to clear the shadow. +INTERCEPTOR(void *, dlopen, const char *filename, int flag) { + ENSURE_MSAN_INITED(); + EnterLoader(); + link_map *map = (link_map *)REAL(dlopen)(filename, flag); + ExitLoader(); + if (!__msan_has_dynamic_component()) { + // If msandr didn't clear the shadow before the initializers ran, we do it + // ourselves afterwards. + UnpoisonMappedDSO(map); + } + return (void *)map; +} + INTERCEPTOR(int, getrusage, int who, void *usage) { ENSURE_MSAN_INITED(); int res = REAL(getrusage)(who, usage); @@ -973,6 +992,7 @@ void InitializeInterceptors() { INTERCEPT_FUNCTION(recvfrom); INTERCEPT_FUNCTION(recvmsg); INTERCEPT_FUNCTION(dladdr); + INTERCEPT_FUNCTION(dlopen); INTERCEPT_FUNCTION(getrusage); inited = 1; } diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 905c5b793987..e1cd13c3f736 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset(); SANITIZER_INTERFACE_ATTRIBUTE int __msan_get_param_tls_offset(); +// For intercepting mmap from ld.so in msandr. +SANITIZER_INTERFACE_ATTRIBUTE +bool __msan_is_in_loader(); + // For testing. SANITIZER_INTERFACE_ATTRIBUTE u32 __msan_get_umr_origin(); diff --git a/compiler-rt/lib/msan/msan_linux.cc b/compiler-rt/lib/msan/msan_linux.cc index 0b08b7defe9c..64aa35b0b354 100644 --- a/compiler-rt/lib/msan/msan_linux.cc +++ b/compiler-rt/lib/msan/msan_linux.cc @@ -16,6 +16,9 @@ #include "msan.h" +#include +#include +#include #include #include #include @@ -87,6 +90,42 @@ static void MsanAtExit(void) { void InstallAtExitHandler() { atexit(MsanAtExit); } + +void UnpoisonMappedDSO(link_map *map) { + typedef ElfW(Phdr) Elf_Phdr; + typedef ElfW(Ehdr) Elf_Ehdr; + char *base = (char *)map->l_addr; + Elf_Ehdr *ehdr = (Elf_Ehdr *)base; + char *phdrs = base + ehdr->e_phoff; + char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize; + + // Find the segment with the minimum base so we can "relocate" the p_vaddr + // fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC + // objects have a non-zero base. + uptr preferred_base = ~0ULL; + for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { + Elf_Phdr *phdr = (Elf_Phdr *)iter; + if (phdr->p_type == PT_LOAD) + preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr); + } + + // Compute the delta from the real base to get a relocation delta. + ptrdiff_t delta = (uptr)base - preferred_base; + // Now we can figure out what the loader really mapped. + for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { + Elf_Phdr *phdr = (Elf_Phdr *)iter; + if (phdr->p_type == PT_LOAD) { + uptr seg_start = phdr->p_vaddr + delta; + uptr seg_end = seg_start + phdr->p_memsz; + // None of these values are aligned. We consider the ragged edges of the + // load command as defined, since they are mapped from the file. + seg_start = RoundDownTo(seg_start, GetPageSizeCached()); + seg_end = RoundUpTo(seg_end, GetPageSizeCached()); + __msan_unpoison((void *)seg_start, seg_end - seg_start); + } + } +} + } // namespace __msan #endif // __linux__ diff --git a/compiler-rt/lib/msan/tests/CMakeLists.txt b/compiler-rt/lib/msan/tests/CMakeLists.txt index 7067c45785cd..813aad02a252 100644 --- a/compiler-rt/lib/msan/tests/CMakeLists.txt +++ b/compiler-rt/lib/msan/tests/CMakeLists.txt @@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS # Unittest sources and build flags. set(MSAN_UNITTEST_SOURCE msan_test.cc) +set(MSAN_LOADABLE_SOURCE msan_loadable.cc) set(MSAN_UNITTEST_HEADERS msandr_test_so.h ../../../include/sanitizer/msan_interface.h @@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS # FIXME: we build libcxx without cxxabi and need libstdc++ to provide it. -lstdc++ ) +set(MSAN_LOADABLE_LINK_FLAGS + -fsanitize=memory + -shared +) # Compile source for the given architecture, using compiler # options in ${ARGN}, and add it to the object list. @@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name arch) add_compiler_rt_test(${test_suite} ${test_name} OBJECTS ${ARGN} DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN} + ${MSAN_LOADABLE_SO} LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS} ${TARGET_LINK_FLAGS} "-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}") @@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch) msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch} ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS}) + # Instrumented loadable module objects. + set(MSAN_INST_LOADABLE_OBJECTS) + msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch} + ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS}) + # Uninstrumented shared object for MSanDR tests. set(MSANDR_TEST_OBJECTS) msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch} ${MSAN_UNITTEST_COMMON_CFLAGS}) + # Instrumented loadable library tests. + set(MSAN_LOADABLE_SO) + msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch} + OBJECTS ${MSAN_INST_LOADABLE_OBJECTS} + DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES}) + # Uninstrumented shared library tests. set(MSANDR_TEST_SO) msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch} diff --git a/compiler-rt/lib/msan/tests/msan_loadable.cc b/compiler-rt/lib/msan/tests/msan_loadable.cc new file mode 100644 index 000000000000..db3bf489853d --- /dev/null +++ b/compiler-rt/lib/msan/tests/msan_loadable.cc @@ -0,0 +1,45 @@ +//===-- msan_loadable.cc --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemorySanitizer. +// +// MemorySanitizer unit tests. +//===----------------------------------------------------------------------===// + +#include "msan/msan_interface_internal.h" +#include + +static void *dso_global; + +// No name mangling. +extern "C" { + +__attribute__((constructor)) +void loadable_module_init(void) { + if (!__msan_has_dynamic_component()) + return; + // The real test is that this compare should not make an uninit. + if (dso_global == NULL) + dso_global = malloc(4); +} + +__attribute__((destructor)) +void loadable_module_fini(void) { + if (!__msan_has_dynamic_component()) + return; + free(dso_global); + // *Don't* overwrite it with NULL! That would unpoison it, but our test + // relies on reloading at the same address and keeping the poison. +} + +void **get_dso_global() { + return &dso_global; +} + +} diff --git a/compiler-rt/lib/msan/tests/msan_test.cc b/compiler-rt/lib/msan/tests/msan_test.cc index b30a8dffd791..c1040d5c2b9c 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cc +++ b/compiler-rt/lib/msan/tests/msan_test.cc @@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) { EXPECT_NOT_POISONED((unsigned long)info.dli_saddr); } +#ifdef __GLIBC__ +extern "C" { + extern void *__libc_stack_end; +} + +static char **GetArgv(void) { + uintptr_t *stack_end = (uintptr_t *)__libc_stack_end; + return (char**)(stack_end + 1); +} + +#else // __GLIBC__ +# error "TODO: port this" +#endif + +TEST(MemorySanitizer, dlopen) { + // Compute the path to our loadable DSO. We assume it's in the same + // directory. Only use string routines that we intercept so far to do this. + char **argv = GetArgv(); + const char *basename = "libmsan_loadable.x86_64.so"; + size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1; + char *path = new char[path_max]; + char *last_slash = strrchr(argv[0], '/'); + assert(last_slash); + snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]), + argv[0], basename); + + // We need to clear shadow for globals when doing dlopen. In order to test + // this, we have to poison the shadow for the DSO before we load it. In + // general this is difficult, but the loader tends to reload things in the + // same place, so we open, close, and then reopen. The global should always + // start out clean after dlopen. + for (int i = 0; i < 2; i++) { + void *lib = dlopen(path, RTLD_LAZY); + if (lib == NULL) { + printf("dlerror: %s\n", dlerror()); + assert(lib != NULL); + } + void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global"); + assert(get_dso_global); + void **dso_global = get_dso_global(); + EXPECT_NOT_POISONED(*dso_global); + __msan_poison(dso_global, sizeof(*dso_global)); + EXPECT_POISONED(*dso_global); + dlclose(lib); + } + + delete[] path; +} + TEST(MemorySanitizer, scanf) { const char *input = "42 hello"; int* d = new int; diff --git a/compiler-rt/lib/msandr/msandr.cc b/compiler-rt/lib/msandr/msandr.cc index 235a1eddd8e4..fee9834ded2d 100644 --- a/compiler-rt/lib/msandr/msandr.cc +++ b/compiler-rt/lib/msandr/msandr.cc @@ -37,6 +37,7 @@ #include #include +#include /* for SYS_mmap */ #include #include @@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data_t *info) int(*__msan_get_retval_tls_offset)(); int(*__msan_get_param_tls_offset)(); +void (*__msan_unpoison)(void *base, size_t size); +bool (*__msan_is_in_loader)(); + +static generic_func_t LookupCallback(module_data_t *app, const char *name) { + generic_func_t callback = dr_get_proc_address(app->handle, name); + if (callback == NULL) { + dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); + CHECK(callback); + } + return callback; +} void InitializeMSanCallbacks() { module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); @@ -113,25 +125,18 @@ void InitializeMSanCallbacks() { } g_app_path = app->full_path; - const char *callback_name = "__msan_get_retval_tls_offset"; - __msan_get_retval_tls_offset = - (int(*)()) dr_get_proc_address(app->handle, callback_name); - if (__msan_get_retval_tls_offset == NULL) { - dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); - CHECK(__msan_get_retval_tls_offset); - } + __msan_get_retval_tls_offset = (int (*)()) + LookupCallback(app, "__msan_get_retval_tls_offset"); + __msan_get_param_tls_offset = (int (*)()) + LookupCallback(app, "__msan_get_param_tls_offset"); + __msan_unpoison = (void(*)(void *, size_t)) + LookupCallback(app, "__msan_unpoison"); + __msan_is_in_loader = (bool (*)()) + LookupCallback(app, "__msan_is_in_loader"); - callback_name = "__msan_get_param_tls_offset"; - __msan_get_param_tls_offset = - (int(*)()) dr_get_proc_address(app->handle, callback_name); - if (__msan_get_param_tls_offset == NULL) { - dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); - CHECK(__msan_get_param_tls_offset); - } + dr_free_module_data(app); } -#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL) - // FIXME: Handle absolute addresses and PC-relative addresses. // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have // a zero base anyway. @@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { if (arg->pre) return true; - if (arg->mode != DRSYS_PARAM_OUT) + if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) return true; size_t sz = arg->size; @@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { (unsigned long long)(sz & 0xFFFFFFFF)); } - void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr); - memset(p, 0, sz); + if (VERBOSITY > 0) { + drmf_status_t res; + drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; + const char *name; + res = drsys_syscall_name(syscall, &name); + dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", + name, arg->ordinal, arg->start_addr, + (char *)arg->start_addr + sz); + } + + // We don't switch to the app context because __msan_unpoison() doesn't need + // TLS segments. + __msan_unpoison(arg->start_addr, sz); return true; /* keep going */ } @@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext, int sysnum) { return true; } +static bool IsInLoader(void *drcontext) { + // TODO: This segment swap is inefficient. DR should just let us query the + // app segment base, which it has. Alternatively, if we disable + // -mangle_app_seg, then we won't need the swap. + bool need_swap = !dr_using_app_state(drcontext); + if (need_swap) + dr_switch_to_app_state(drcontext); + bool is_in_loader = __msan_is_in_loader(); + if (need_swap) + dr_switch_to_dr_state(drcontext); + return is_in_loader; +} + void event_post_syscall(void *drcontext, int sysnum) { drsys_syscall_t *syscall; drsys_sysnum_t sysnum_full; @@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext, int sysnum) { drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); CHECK(res == DRMF_SUCCESS); } + + // Our normal mmap interceptor can't intercept calls from the loader itself. + // This means we don't clear the shadow for calls to dlopen. For now, we + // solve this by intercepting mmap from ld.so here, but ideally we'd have a + // solution that doesn't rely on msandr. + // + // Be careful not to intercept maps done by the msan rtl. Otherwise we end up + // unpoisoning vast regions of memory and OOMing. + // TODO: __msan_unpoison() could "flush" large regions of memory like tsan + // does instead of doing a large memset. However, we need the memory to be + // zeroed, where as tsan does not, so plain madvise is not enough. + if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { + if (IsInLoader(drcontext)) { + app_pc base = (app_pc)dr_syscall_get_result(drcontext); + ptr_uint_t size; + drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); + CHECK(res == DRMF_SUCCESS); + if (VERBOSITY > 0) + dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); + // We don't switch to the app context because __msan_unpoison() doesn't + // need TLS segments. + __msan_unpoison(base, size); + } + } } } // namespace