llvm-project/compiler-rt/lib/msandr/msandr.cc

671 lines
22 KiB
C++

//===-- msandr.cc ---------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of MemorySanitizer.
//
// DynamoRio client for MemorySanitizer.
//
// MemorySanitizer requires that all program code is instrumented. Any memory
// store that can turn an uninitialized value into an initialized value must be
// observed by the tool, otherwise we risk reporting a false UMR.
//
// This also includes any libraries that the program depends on.
//
// In the case when rebuilding all program dependencies with MemorySanitizer is
// problematic, an experimental MSanDR tool (the code you are currently looking
// at) can be used. It is a DynamoRio-based tool that uses dynamic
// instrumentation to
// * Unpoison all memory stores.
// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
// return value shadow on anything that looks like a function call or a return
// from a function.
//
// This tool does not detect the use of uninitialized values in uninstrumented
// libraries. It merely gets rid of false positives by marking all data that
// passes through uninstrumented code as fully initialized.
//===----------------------------------------------------------------------===//
#include <dr_api.h>
#include <drutil.h>
#include <drmgr.h>
#include <drsyscall.h>
#include <sys/mman.h>
#include <algorithm>
#include <string>
#include <set>
#include <vector>
#include <string.h>
using std::string;
#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
#define CHECK_IMPL(condition, file, line) \
do { \
if (!(condition)) { \
dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \
dr_abort(); \
} \
} while (0) // TODO: stacktrace
#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
#define VERBOSITY 0
namespace {
class ModuleData {
public:
ModuleData();
ModuleData(const module_data_t *info);
// Yes, we want default copy, assign, and dtor semantics.
public:
app_pc start_;
app_pc end_;
// Full path to the module.
string path_;
module_handle_t handle_;
bool should_instrument_;
bool executed_;
};
string g_app_path;
int msan_retval_tls_offset;
int msan_param_tls_offset;
// A vector of loaded modules sorted by module bounds. We lookup the current PC
// in here from the bb event. This is better than an rb tree because the lookup
// is faster and the bb event occurs far more than the module load event.
std::vector<ModuleData> g_module_list;
ModuleData::ModuleData()
: start_(NULL), end_(NULL), path_(""), handle_(NULL),
should_instrument_(false), executed_(false) {
}
ModuleData::ModuleData(const module_data_t *info)
: start_(info->start), end_(info->end), path_(info->full_path),
handle_(info->handle),
// We'll check the black/white lists later and adjust this.
should_instrument_(true), executed_(false) {
}
int(*__msan_get_retval_tls_offset)();
int(*__msan_get_param_tls_offset)();
void InitializeMSanCallbacks() {
module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
if (!app) {
dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
dr_get_application_name());
CHECK(app);
}
g_app_path = app->full_path;
const char *callback_name = "__msan_get_retval_tls_offset";
__msan_get_retval_tls_offset =
(int(*)()) dr_get_proc_address(app->handle, callback_name);
if (__msan_get_retval_tls_offset == NULL) {
dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
CHECK(__msan_get_retval_tls_offset);
}
callback_name = "__msan_get_param_tls_offset";
__msan_get_param_tls_offset =
(int(*)()) dr_get_proc_address(app->handle, callback_name);
if (__msan_get_param_tls_offset == NULL) {
dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
CHECK(__msan_get_param_tls_offset);
}
}
#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
// FIXME: Handle absolute addresses and PC-relative addresses.
// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
// a zero base anyway.
bool OperandIsInteresting(opnd_t opnd) {
return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
opnd_get_segment(opnd) != DR_SEG_GS);
}
bool WantToInstrument(instr_t *instr) {
// TODO: skip push instructions?
switch (instr_get_opcode(instr)) {
// FIXME: support the instructions excluded below:
case OP_rep_cmps:
// f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
return false;
}
// Labels appear due to drutil_expand_rep_string()
if (instr_is_label(instr))
return false;
CHECK(instr_ok_to_mangle(instr) == true);
if (instr_writes_memory(instr)) {
for (int d = 0; d < instr_num_dsts(instr); d++) {
opnd_t op = instr_get_dst(instr, d);
if (OperandIsInteresting(op))
return true;
}
}
return false;
}
#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
#define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
bool is_write) {
bool need_to_restore_eflags = false;
uint flags = instr_get_arith_flags(instr);
// TODO: do something smarter with flags and spills in general?
// For example, spill them only once for a sequence of instrumented
// instructions that don't change/read flags.
if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
if (VERBOSITY > 1)
dr_printf("Spilling eflags...\n");
need_to_restore_eflags = true;
// TODO: Maybe sometimes don't need to 'seto'.
// TODO: Maybe sometimes don't want to spill XAX here?
// TODO: No need to spill XAX here if XAX is not used in the BB.
dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
dr_save_arith_flags_to_xax(drcontext, bb, instr);
dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
}
#if 0
dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
opnd_is_memory_reference(op), opnd_is_base_disp(op),
opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
#endif
reg_id_t R1;
bool address_in_R1 = false;
if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
opnd_get_disp(op) == 0) {
// If this is a simple access with no offset or index, we can just use the
// base for R1.
address_in_R1 = true;
R1 = opnd_get_base(op);
} else {
// Otherwise, we need to compute the addr into R1.
// TODO: reuse some spare register? e.g. r15 on x64
// TODO: might be used as a non-mem-ref register?
R1 = DR_REG_XAX;
}
CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
// Pick R2 that's not R1 or used by the operand. It's OK if the instr uses
// R2 elsewhere, since we'll restore it before instr.
reg_id_t GPR_TO_USE_FOR_R2[] = {
DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX
// Don't forget to update the +4 below if you add anything else!
};
std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4);
unused_registers.erase(R1);
for (int j = 0; j < opnd_num_regs_used(op); j++) {
unused_registers.erase(opnd_get_reg_used(op, j));
}
CHECK(unused_registers.size() > 0);
reg_id_t R2 = *unused_registers.begin();
CHECK(R1 != R2);
// Save the current values of R1 and R2.
dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
// TODO: Something smarter than spilling a "fixed" register R2?
dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
if (!address_in_R1)
CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
OPND_CREATE_INT64(0xffffbfffffffffff)));
PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
// There is no mov_st of a 64-bit immediate, so...
opnd_size_t op_size = opnd_get_size(op);
CHECK(op_size != OPSZ_NA);
uint access_size = opnd_size_in_bytes(op_size);
if (access_size <= 4) {
PRE(instr,
mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
opnd_create_immed_int((ptr_int_t) 0, op_size)));
} else {
// FIXME: tail?
for (uint ofs = 0; ofs < access_size; ofs += 4) {
PRE(instr,
mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0)));
}
}
// Restore the registers and flags.
dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
if (need_to_restore_eflags) {
if (VERBOSITY > 1)
dr_printf("Restoring eflags\n");
// TODO: Check if it's reverse to the dr_restore_reg above and optimize.
dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
dr_restore_arith_flags_from_xax(drcontext, bb, instr);
dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
}
// The original instruction is left untouched. The above instrumentation is just
// a prefix.
}
void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
// Clobbers nothing except xax.
bool res =
dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
CHECK(res);
// TODO: unpoison more bytes?
PRE(instr,
mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
OPND_CREATE_INT32(0)));
dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
// The original instruction is left untouched. The above instrumentation is just
// a prefix.
}
void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
instr_t *instr) {
dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
// Clobbers nothing except xax.
bool res =
dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
CHECK(res);
// TODO: unpoison more bytes?
for (int i = 0; i < 6; ++i) {
PRE(instr,
mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
i * sizeof(void *)),
OPND_CREATE_INT32(0)));
}
dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
// The original instruction is left untouched. The above instrumentation is just
// a prefix.
}
// For use with binary search. Modules shouldn't overlap, so we shouldn't have
// to look at end_. If that can happen, we won't support such an application.
bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
return left.start_ < right.start_;
}
// Look up the module containing PC. Should be relatively fast, as its called
// for each bb instrumentation.
ModuleData *LookupModuleByPC(app_pc pc) {
ModuleData fake_mod_data;
fake_mod_data.start_ = pc;
std::vector<ModuleData>::iterator it =
lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
ModuleDataCompareStart);
// if (it == g_module_list.end())
// return NULL;
if (it == g_module_list.end() || pc < it->start_)
--it;
CHECK(it->start_ <= pc);
if (pc >= it->end_) {
// We're past the end of this module. We shouldn't be in the next module,
// or lower_bound lied to us.
++it;
CHECK(it == g_module_list.end() || pc < it->start_);
return NULL;
}
// OK, we found the module.
return &*it;
}
bool ShouldInstrumentNonModuleCode() { return true; }
bool ShouldInstrumentModule(ModuleData *mod_data) {
// TODO(rnk): Flags for blacklist would get wired in here.
generic_func_t p =
dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
return !p;
}
bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
ModuleData *mod_data = LookupModuleByPC(pc);
if (pmod_data)
*pmod_data = mod_data;
if (mod_data != NULL) {
// This module is on a blacklist.
if (!mod_data->should_instrument_) {
return false;
}
} else if (!ShouldInstrumentNonModuleCode()) {
return false;
}
return true;
}
// TODO(rnk): Make sure we instrument after __msan_init.
dr_emit_flags_t
event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating) {
app_pc pc = dr_fragment_app_pc(tag);
if (ShouldInstrumentPc(pc, NULL))
CHECK(drutil_expand_rep_string(drcontext, bb));
return DR_EMIT_PERSISTABLE;
}
dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating) {
app_pc pc = dr_fragment_app_pc(tag);
ModuleData *mod_data;
if (!ShouldInstrumentPc(pc, &mod_data))
return DR_EMIT_PERSISTABLE;
if (VERBOSITY > 1)
dr_printf("============================================================\n");
if (VERBOSITY > 0) {
string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
if (mod_data && !mod_data->executed_) {
mod_data->executed_ = true; // Nevermind this race.
dr_printf("Executing from new module: %s\n", mod_path.c_str());
}
dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
mod_path.c_str(), translating ? "true" : "false");
if (mod_data) {
// Match standard sanitizer trace format for free symbols.
// #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
pc - mod_data->start_);
}
}
if (VERBOSITY > 1) {
instrlist_disassemble(drcontext, pc, bb, STDOUT);
instr_t *instr;
for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
dr_printf("opcode: %d\n", instr_get_opcode(instr));
}
}
for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
int opcode = instr_get_opcode(i);
if (opcode == OP_ret || opcode == OP_ret_far) {
InstrumentReturn(drcontext, bb, i);
continue;
}
// These instructions hopefully cover all cases where control is transferred
// to a function in a different module (we only care about calls into
// compiler-instrumented modules).
// * call_ind is used for normal indirect calls.
// * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
// stub includes a jump to an address from GOT).
if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
InstrumentIndirectBranch(drcontext, bb, i);
continue;
}
if (!WantToInstrument(i))
continue;
if (VERBOSITY > 1) {
app_pc orig_pc = dr_fragment_app_pc(tag);
uint flags = instr_get_arith_flags(i);
dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
}
if (instr_writes_memory(i)) {
// Instrument memory writes
// bool instrumented_anything = false;
for (int d = 0; d < instr_num_dsts(i); d++) {
opnd_t op = instr_get_dst(i, d);
if (!OperandIsInteresting(op))
continue;
// CHECK(!instrumented_anything);
// instrumented_anything = true;
InstrumentMops(drcontext, bb, i, op, true);
break; // only instrumenting the first dst
}
}
}
// TODO: optimize away redundant restore-spill pairs?
if (VERBOSITY > 1) {
pc = dr_fragment_app_pc(tag);
dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
instrlist_disassemble(drcontext, pc, bb, STDOUT);
}
return DR_EMIT_PERSISTABLE;
}
void event_module_load(void *drcontext, const module_data_t *info,
bool loaded) {
// Insert the module into the list while maintaining the ordering.
ModuleData mod_data(info);
std::vector<ModuleData>::iterator it =
upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
ModuleDataCompareStart);
it = g_module_list.insert(it, mod_data);
// Check if we should instrument this module.
it->should_instrument_ = ShouldInstrumentModule(&*it);
dr_module_set_should_instrument(info->handle, it->should_instrument_);
if (VERBOSITY > 0)
dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
info->full_path, info->start, info->end,
it->should_instrument_ ? "on" : "off");
}
void event_module_unload(void *drcontext, const module_data_t *info) {
if (VERBOSITY > 0)
dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
info->start, info->end);
// Remove the module from the list.
ModuleData mod_data(info);
std::vector<ModuleData>::iterator it =
lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
ModuleDataCompareStart);
// It's a bug if we didn't actually find the module.
CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
g_module_list.erase(it);
}
void event_exit() {
if (VERBOSITY > 0)
dr_printf("==DRMSAN== DONE\n");
}
bool event_filter_syscall(void *drcontext, int sysnum) {
// FIXME: only intercept syscalls with memory effects.
return true; /* intercept everything */
}
bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
CHECK(arg->valid);
if (arg->pre)
return true;
if (arg->mode != DRSYS_PARAM_OUT)
return true;
size_t sz = arg->size;
if (sz > 0xFFFFFFFF) {
drmf_status_t res;
drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
const char *name;
res = drsys_syscall_name(syscall, &name);
CHECK(res == DRMF_SUCCESS);
dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
" Clipping to %llu.\n",
name, arg->ordinal, (unsigned long long) sz,
(unsigned long long)(sz & 0xFFFFFFFF));
}
void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
memset(p, 0, sz);
return true; /* keep going */
}
bool event_pre_syscall(void *drcontext, int sysnum) {
drsys_syscall_t *syscall;
drsys_sysnum_t sysnum_full;
bool known;
drsys_param_type_t ret_type;
drmf_status_t res;
const char *name;
res = drsys_cur_syscall(drcontext, &syscall);
CHECK(res == DRMF_SUCCESS);
res = drsys_syscall_number(syscall, &sysnum_full);
CHECK(res == DRMF_SUCCESS);
CHECK(sysnum == sysnum_full.number);
res = drsys_syscall_is_known(syscall, &known);
CHECK(res == DRMF_SUCCESS);
res = drsys_syscall_name(syscall, &name);
CHECK(res == DRMF_SUCCESS);
res = drsys_syscall_return_type(syscall, &ret_type);
CHECK(res == DRMF_SUCCESS);
CHECK(ret_type != DRSYS_TYPE_INVALID);
CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
CHECK(res == DRMF_SUCCESS);
return true;
}
void event_post_syscall(void *drcontext, int sysnum) {
drsys_syscall_t *syscall;
drsys_sysnum_t sysnum_full;
bool success = false;
drmf_status_t res;
res = drsys_cur_syscall(drcontext, &syscall);
CHECK(res == DRMF_SUCCESS);
res = drsys_syscall_number(syscall, &sysnum_full);
CHECK(res == DRMF_SUCCESS);
CHECK(sysnum == sysnum_full.number);
res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
&success);
CHECK(res == DRMF_SUCCESS);
if (success) {
res =
drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
CHECK(res == DRMF_SUCCESS);
}
}
} // namespace
DR_EXPORT void dr_init(client_id_t id) {
drmf_status_t res;
drmgr_init();
drutil_init();
string app_name = dr_get_application_name();
// This blacklist will still run these apps through DR's code cache. On the
// other hand, we are able to follow children of these apps.
// FIXME: Once DR has detach, we could just detach here. Alternatively,
// if DR had a fork or exec hook to let us decide there, that would be nice.
// FIXME: make the blacklist cmd-adjustable.
if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
app_name == "sh" || app_name == "true" || app_name == "exit" ||
app_name == "yes" || app_name == "echo")
return;
drsys_options_t ops;
memset(&ops, 0, sizeof(ops));
ops.struct_size = sizeof(ops);
ops.analyze_unknown_syscalls = false;
res = drsys_init(id, &ops);
CHECK(res == DRMF_SUCCESS);
dr_register_filter_syscall_event(event_filter_syscall);
drmgr_register_pre_syscall_event(event_pre_syscall);
drmgr_register_post_syscall_event(event_post_syscall);
res = drsys_filter_all_syscalls();
CHECK(res == DRMF_SUCCESS);
InitializeMSanCallbacks();
// FIXME: the shadow is initialized earlier when DR calls one of our wrapper
// functions. This may change one day.
// TODO: make this more robust.
void *drcontext = dr_get_current_drcontext();
dr_switch_to_app_state(drcontext);
msan_retval_tls_offset = __msan_get_retval_tls_offset();
msan_param_tls_offset = __msan_get_param_tls_offset();
dr_switch_to_dr_state(drcontext);
if (VERBOSITY > 0) {
dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
}
// Standard DR events.
dr_register_exit_event(event_exit);
drmgr_priority_t priority = {
sizeof(priority), /* size of struct */
"msandr", /* name of our operation */
NULL, /* optional name of operation we should precede */
NULL, /* optional name of operation we should follow */
0
}; /* numeric priority */
drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
drmgr_register_module_load_event(event_module_load);
drmgr_register_module_unload_event(event_module_unload);
if (VERBOSITY > 0)
dr_printf("==MSANDR== Starting!\n");
}