From 59886f3c1a5253212f06912ac707337cba6d5525 Mon Sep 17 00:00:00 2001 From: Lorenzo Martignoni Date: Thu, 9 Apr 2015 11:42:33 +0000 Subject: [PATCH] Differential Revision: http://reviews.llvm.org/D7249 llvm-svn: 234477 --- compiler-rt/lib/dfsan/dfsan.cc | 15 +- compiler-rt/lib/dfsan/dfsan.h | 1 + compiler-rt/lib/dfsan/dfsan_custom.cc | 407 ++++++++++++-------------- compiler-rt/test/dfsan/custom.cc | 5 + 4 files changed, 211 insertions(+), 217 deletions(-) diff --git a/compiler-rt/lib/dfsan/dfsan.cc b/compiler-rt/lib/dfsan/dfsan.cc index 5a8da38675f3..1b0dc8fc3b7b 100644 --- a/compiler-rt/lib/dfsan/dfsan.cc +++ b/compiler-rt/lib/dfsan/dfsan.cc @@ -24,6 +24,7 @@ #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_flag_parser.h" #include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_internal_defs.h" #include "dfsan/dfsan.h" @@ -110,8 +111,7 @@ static void dfsan_check_label(dfsan_label label) { // Resolves the union of two unequal labels. Nonequality is a precondition for // this function (the instrumentation pass inlines the equality test). -extern "C" SANITIZER_INTERFACE_ATTRIBUTE -dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) { +dfsan_label __dfsan_union_internal(dfsan_label l1, dfsan_label l2, uptr pc) { DCHECK_NE(l1, l2); if (l1 == 0) @@ -143,6 +143,7 @@ dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) { dfsan_check_label(label); __dfsan_label_info[label].l1 = l1; __dfsan_label_info[label].l2 = l2; + __dfsan_label_info[label].pc = pc; } atomic_store(table_ent, label, memory_order_release); } else if (label == kInitializingLabel) { @@ -155,13 +156,18 @@ dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) { return label; } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE +dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) { + return __dfsan_union_internal(l1, l2, GET_CALLER_PC()); +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) { dfsan_label label = ls[0]; for (uptr i = 1; i != n; ++i) { dfsan_label next_label = ls[i]; if (label != next_label) - label = __dfsan_union(label, next_label); + label = __dfsan_union_internal(label, next_label, GET_CALLER_PC()); } return label; } @@ -196,7 +202,7 @@ SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2) { if (l1 == l2) return l1; - return __dfsan_union(l1, l2); + return __dfsan_union_internal(l1, l2, GET_CALLER_PC()); } extern "C" SANITIZER_INTERFACE_ATTRIBUTE @@ -206,6 +212,7 @@ dfsan_label dfsan_create_label(const char *desc, void *userdata) { dfsan_check_label(label); __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0; __dfsan_label_info[label].desc = desc; + __dfsan_label_info[label].pc = GET_CALLER_PC(); __dfsan_label_info[label].userdata = userdata; return label; } diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h index ceba3533a233..4b374d516123 100644 --- a/compiler-rt/lib/dfsan/dfsan.h +++ b/compiler-rt/lib/dfsan/dfsan.h @@ -24,6 +24,7 @@ struct dfsan_label_info { dfsan_label l1; dfsan_label l2; const char *desc; + uptr pc; void *userdata; }; diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cc b/compiler-rt/lib/dfsan/dfsan_custom.cc index 318ecd6fb317..35f6fb4cd7ba 100644 --- a/compiler-rt/lib/dfsan/dfsan_custom.cc +++ b/compiler-rt/lib/dfsan/dfsan_custom.cc @@ -847,266 +847,247 @@ __dfsw_write(int fd, const void *buf, size_t count, *ret_label = 0; return write(fd, buf, count); } +} // Type used to extract a dfsan_label with va_arg() typedef int dfsan_label_va; -// A chunk of data representing the output of formatting either a constant -// string or a single format directive. -struct Chunk { - // Address of the beginning of the formatted string - const char *ptr; - // Size of the formatted string +// Formats a chunk either a constant string or a single format directive (e.g., +// '%.3f'). +struct Formatter { + Formatter(char *str_, const char *fmt_, size_t size_) + : str(str_), str_off(0), size(size_), fmt_start(fmt_), fmt_cur(fmt_), + width(-1) {} + + int format() { + char *tmp_fmt = build_format_string(); + int retval = + snprintf(str + str_off, str_off < size ? size - str_off : 0, tmp_fmt, + 0 /* used only to avoid warnings */); + free(tmp_fmt); + return retval; + } + + template int format(T arg) { + char *tmp_fmt = build_format_string(); + int retval; + if (width >= 0) { + retval = snprintf(str + str_off, str_off < size ? size - str_off : 0, + tmp_fmt, width, arg); + } else { + retval = snprintf(str + str_off, str_off < size ? size - str_off : 0, + tmp_fmt, arg); + } + free(tmp_fmt); + return retval; + } + + char *build_format_string() { + size_t fmt_size = fmt_cur - fmt_start + 1; + char *new_fmt = (char *)malloc(fmt_size + 1); + assert(new_fmt); + internal_memcpy(new_fmt, fmt_start, fmt_size); + new_fmt[fmt_size] = '\0'; + return new_fmt; + } + + char *str_cur() { return str + str_off; } + + size_t num_written_bytes(int retval) { + if (retval < 0) { + return 0; + } + + size_t num_avail = str_off < size ? size - str_off : 0; + if (num_avail == 0) { + return 0; + } + + size_t num_written = retval; + // A return value of {v,}snprintf of size or more means that the output was + // truncated. + if (num_written >= num_avail) { + num_written -= num_avail; + } + + return num_written; + } + + char *str; + size_t str_off; size_t size; - - // Type of DFSan label (depends on the format directive) - enum { - // Constant string, no argument and thus no label - NONE = 0, - // Label for an argument of '%n' - IGNORED, - // Label for a '%s' argument - STRING, - // Label for any other type of argument - NUMERIC, - } label_type; - - // Value of the argument (if label_type == STRING) - const char *arg; + const char *fmt_start; + const char *fmt_cur; + int width; }; -// Formats the input. The output is stored in 'str' starting from offset -// 'off'. The format directive is represented by the first 'format_size' bytes -// of 'format'. If 'has_size' is true, 'size' bounds the number of output -// bytes. Returns the return value of the vsnprintf call used to format the -// input. -static int format_chunk(char *str, size_t off, bool has_size, size_t size, - const char *format, size_t format_size, ...) { - char *chunk_format = (char *) malloc(format_size + 1); - assert(chunk_format); - internal_memcpy(chunk_format, format, format_size); - chunk_format[format_size] = '\0'; - - va_list ap; - va_start(ap, format_size); - int r = 0; - if (has_size) { - r = vsnprintf(str + off, off < size ? size - off : 0, chunk_format, ap); - } else { - r = vsprintf(str + off, chunk_format, ap); - } - va_end(ap); - - free(chunk_format); - return r; -} - // Formats the input and propagates the input labels to the output. The output -// is stored in 'str'. If 'has_size' is true, 'size' bounds the number of -// output bytes. 'format' and 'ap' are the format string and the list of -// arguments for formatting. Returns the return value vsnprintf would return. +// is stored in 'str'. 'size' bounds the number of output bytes. 'format' and +// 'ap' are the format string and the list of arguments for formatting. Returns +// the return value vsnprintf would return. // // The function tokenizes the format string in chunks representing either a // constant string or a single format directive (e.g., '%.3f') and formats each // chunk independently into the output string. This approach allows to figure // out which bytes of the output string depends on which argument and thus to // propagate labels more precisely. -static int format_buffer(char *str, bool has_size, size_t size, - const char *format, dfsan_label *va_labels, - dfsan_label *ret_label, va_list ap) { - InternalMmapVector chunks(8); - size_t off = 0; +// +// WARNING: This implementation does not support conversion specifiers with +// positional arguments. +static int format_buffer(char *str, size_t size, const char *fmt, + dfsan_label *va_labels, dfsan_label *ret_label, + va_list ap) { + Formatter formatter(str, fmt, size); - while (*format) { - chunks.push_back(Chunk()); - Chunk& chunk = chunks.back(); - chunk.ptr = str + off; - chunk.arg = nullptr; + while (*formatter.fmt_cur) { + formatter.fmt_start = formatter.fmt_cur; + formatter.width = -1; + int retval = 0; - int status = 0; - - if (*format != '%') { + if (*formatter.fmt_cur != '%') { // Ordinary character. Consume all the characters until a '%' or the end // of the string. - size_t format_size = 0; - for (; *format && *format != '%'; ++format, ++format_size) {} - status = format_chunk(str, off, has_size, size, format - format_size, - format_size); - chunk.label_type = Chunk::NONE; + for (; *(formatter.fmt_cur + 1) && *(formatter.fmt_cur + 1) != '%'; + ++formatter.fmt_cur) {} + retval = formatter.format(); + dfsan_set_label(0, formatter.str_cur(), + formatter.num_written_bytes(retval)); } else { // Conversion directive. Consume all the characters until a conversion // specifier or the end of the string. - bool end_format = false; -#define FORMAT_CHUNK(t) \ - format_chunk(str, off, has_size, size, format - format_size, \ - format_size + 1, va_arg(ap, t)) - - for (size_t format_size = 1; *++format && !end_format; ++format_size) { - switch (*format) { - case 'd': - case 'i': - case 'o': - case 'u': - case 'x': - case 'X': - switch (*(format - 1)) { - case 'h': - // Also covers the 'hh' case (since the size of the arg is still - // an int). - status = FORMAT_CHUNK(int); - break; - case 'l': - if (format_size >= 2 && *(format - 2) == 'l') { - status = FORMAT_CHUNK(long long int); - } else { - status = FORMAT_CHUNK(long int); - } - break; - case 'q': - status = FORMAT_CHUNK(long long int); - break; - case 'j': - status = FORMAT_CHUNK(intmax_t); - break; - case 'z': - status = FORMAT_CHUNK(size_t); - break; - case 't': - status = FORMAT_CHUNK(size_t); - break; - default: - status = FORMAT_CHUNK(int); - } - chunk.label_type = Chunk::NUMERIC; - end_format = true; + bool end_fmt = false; + for (; *formatter.fmt_cur && !end_fmt; ) { + switch (*++formatter.fmt_cur) { + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + switch (*(formatter.fmt_cur - 1)) { + case 'h': + // Also covers the 'hh' case (since the size of the arg is still + // an int). + retval = formatter.format(va_arg(ap, int)); break; - - case 'a': - case 'A': - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - if (*(format - 1) == 'L') { - status = FORMAT_CHUNK(long double); + case 'l': + if (formatter.fmt_cur - formatter.fmt_start >= 2 && + *(formatter.fmt_cur - 2) == 'l') { + retval = formatter.format(va_arg(ap, long long int)); } else { - status = FORMAT_CHUNK(double); + retval = formatter.format(va_arg(ap, long int)); } - chunk.label_type = Chunk::NUMERIC; - end_format = true; break; - - case 'c': - status = FORMAT_CHUNK(int); - chunk.label_type = Chunk::NUMERIC; - end_format = true; + case 'q': + retval = formatter.format(va_arg(ap, long long int)); break; - - case 's': - chunk.arg = va_arg(ap, char *); - status = - format_chunk(str, off, has_size, size, - format - format_size, format_size + 1, - chunk.arg); - chunk.label_type = Chunk::STRING; - end_format = true; + case 'j': + retval = formatter.format(va_arg(ap, intmax_t)); break; - - case 'p': - status = FORMAT_CHUNK(void *); - chunk.label_type = Chunk::NUMERIC; - end_format = true; + case 'z': + case 't': + retval = formatter.format(va_arg(ap, size_t)); break; - - case 'n': - *(va_arg(ap, int *)) = (int)off; - chunk.label_type = Chunk::IGNORED; - end_format = true; - break; - - case '%': - status = format_chunk(str, off, has_size, size, - format - format_size, format_size + 1); - chunk.label_type = Chunk::NONE; - end_format = true; - break; - default: - break; + retval = formatter.format(va_arg(ap, int)); + } + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + if (*(formatter.fmt_cur - 1) == 'L') { + retval = formatter.format(va_arg(ap, long double)); + } else { + retval = formatter.format(va_arg(ap, double)); + } + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + + case 'c': + retval = formatter.format(va_arg(ap, int)); + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + + case 's': { + char *arg = va_arg(ap, char *); + retval = formatter.format(arg); + va_labels++; + internal_memcpy(shadow_for(formatter.str_cur()), shadow_for(arg), + sizeof(dfsan_label) * + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + } + + case 'p': + retval = formatter.format(va_arg(ap, void *)); + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + + case 'n': { + int *ptr = va_arg(ap, int *); + *ptr = (int)formatter.str_off; + va_labels++; + dfsan_set_label(0, ptr, sizeof(ptr)); + end_fmt = true; + break; + } + + case '%': + retval = formatter.format(); + dfsan_set_label(0, formatter.str_cur(), + formatter.num_written_bytes(retval)); + end_fmt = true; + break; + + case '*': + formatter.width = va_arg(ap, int); + va_labels++; + break; + + default: + break; } } -#undef FORMAT_CHUNK } - if (status < 0) { - return status; + if (retval < 0) { + return retval; } - // A return value of {v,}snprintf of size or more means that the output was - // truncated. - if (has_size) { - if (off < size) { - size_t ustatus = (size_t) status; - chunk.size = ustatus >= (size - off) ? - ustatus - (size - off) : ustatus; - } else { - chunk.size = 0; - } - } else { - chunk.size = status; - } - off += status; - } - - // TODO(martignlo): Decide how to combine labels (e.g., whether to ignore or - // not the label of the format string). - - // Label each output chunk according to the label supplied as argument to the - // function. We need to go through all the chunks and arguments even if the - // string was only partially printed ({v,}snprintf case). - for (size_t i = 0; i < chunks.size(); ++i) { - const Chunk& chunk = chunks[i]; - void *chunk_ptr = const_cast(chunk.ptr); - - switch (chunk.label_type) { - case Chunk::NONE: - dfsan_set_label(0, chunk_ptr, chunk.size); - break; - case Chunk::IGNORED: - va_labels++; - dfsan_set_label(0, chunk_ptr, chunk.size); - break; - case Chunk::NUMERIC: { - dfsan_label label = *va_labels++; - dfsan_set_label(label, chunk_ptr, chunk.size); - break; - } - case Chunk::STRING: { - // Consume the label of the pointer to the string - va_labels++; - internal_memcpy(shadow_for(chunk_ptr), - shadow_for(chunk.arg), - sizeof(dfsan_label) * (strlen(chunk.arg))); - break; - } - } + formatter.fmt_cur++; + formatter.str_off += retval; } *ret_label = 0; // Number of bytes written in total. - return off; + return formatter.str_off; } +extern "C" { SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_sprintf(char *str, const char *format, dfsan_label str_label, dfsan_label format_label, dfsan_label *va_labels, dfsan_label *ret_label, ...) { va_list ap; va_start(ap, ret_label); - int ret = format_buffer(str, false, 0, format, va_labels, ret_label, ap); + int ret = format_buffer(str, ~0ul, format, va_labels, ret_label, ap); va_end(ap); return ret; } @@ -1118,7 +1099,7 @@ int __dfsw_snprintf(char *str, size_t size, const char *format, dfsan_label *ret_label, ...) { va_list ap; va_start(ap, ret_label); - int ret = format_buffer(str, true, size, format, va_labels, ret_label, ap); + int ret = format_buffer(str, size, format, va_labels, ret_label, ap); va_end(ap); return ret; } diff --git a/compiler-rt/test/dfsan/custom.cc b/compiler-rt/test/dfsan/custom.cc index bdd7cf5117f8..057b0608e038 100644 --- a/compiler-rt/test/dfsan/custom.cc +++ b/compiler-rt/test/dfsan/custom.cc @@ -870,6 +870,11 @@ void test_sprintf() { test_sprintf_chunk("z", "%c", 'z'); // %n, %s, %d, %f, and %% already tested + + // Test formatting with width passed as an argument. + r = sprintf(buf, "hi %*d my %*s friend %.*f", 3, 1, 6, "dear", 4, 3.14159265359); + assert(r == 30); + assert(strcmp(buf, "hi 1 my dear friend 3.1416") == 0); } void test_snprintf() {