Custom wrappers for DFSanitizing sprintf & snprintf.

Differential Revision: http://reviews.llvm.org/D5561

llvm-svn: 219293
This commit is contained in:
Lorenzo Martignoni 2014-10-08 10:01:42 +00:00
parent 58a2461368
commit 60ebffc12f
6 changed files with 416 additions and 14 deletions

View File

@ -12,12 +12,14 @@
// This file defines the custom functions listed in done_abilist.txt.
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_linux.h"
#include "dfsan/dfsan.h"
#include <arpa/inet.h>
#include <assert.h>
#include <ctype.h>
#include <dlfcn.h>
#include <link.h>
@ -26,6 +28,8 @@
#include <pwd.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -839,4 +843,281 @@ __dfsw_write(int fd, const void *buf, size_t count,
*ret_label = 0;
return write(fd, buf, count);
}
// Type used to extract a dfsan_label with va_arg()
typedef int dfsan_label_va;
// A chunk of data representing the output of formatting either a constant
// string or a single format directive.
struct Chunk {
// Address of the beginning of the formatted string
const char *ptr;
// Size of the formatted string
size_t size;
// Type of DFSan label (depends on the format directive)
enum {
// Constant string, no argument and thus no label
NONE = 0,
// Label for an argument of '%n'
IGNORED,
// Label for a '%s' argument
STRING,
// Label for any other type of argument
NUMERIC,
} label_type;
// Value of the argument (if label_type == STRING)
const char *arg;
};
// Formats the input. The output is stored in 'str' starting from offset
// 'off'. The format directive is represented by the first 'format_size' bytes
// of 'format'. If 'has_size' is true, 'size' bounds the number of output
// bytes. Returns the return value of the vsnprintf call used to format the
// input.
static int format_chunk(char *str, size_t off, bool has_size, size_t size,
const char *format, size_t format_size, ...) {
char *chunk_format = (char *) malloc(format_size + 1);
assert(chunk_format);
internal_memcpy(chunk_format, format, format_size);
chunk_format[format_size] = '\0';
va_list ap;
va_start(ap, format_size);
int r = 0;
if (has_size) {
r = vsnprintf(str + off, off < size ? size - off : 0, chunk_format, ap);
} else {
r = vsprintf(str + off, chunk_format, ap);
}
va_end(ap);
free(chunk_format);
return r;
}
// Formats the input and propagates the input labels to the output. The output
// is stored in 'str'. If 'has_size' is true, 'size' bounds the number of
// output bytes. 'format' and 'ap' are the format string and the list of
// arguments for formatting. Returns the return value vsnprintf would return.
//
// The function tokenizes the format string in chunks representing either a
// constant string or a single format directive (e.g., '%.3f') and formats each
// chunk independently into the output string. This approach allows to figure
// out which bytes of the output string depends on which argument and thus to
// propagate labels more precisely.
static int format_buffer(char *str, bool has_size, size_t size,
const char *format, va_list ap) {
InternalMmapVector<Chunk> chunks(8);
size_t off = 0;
while (*format) {
chunks.push_back(Chunk());
Chunk& chunk = chunks.back();
chunk.ptr = str + off;
chunk.arg = nullptr;
int status = 0;
if (*format != '%') {
// Ordinary character. Consume all the characters until a '%' or the end
// of the string.
size_t format_size = 0;
for (; *format && *format != '%'; ++format, ++format_size) {}
status = format_chunk(str, off, has_size, size, format - format_size,
format_size);
chunk.label_type = Chunk::NONE;
} else {
// Conversion directive. Consume all the characters until a conversion
// specifier or the end of the string.
bool end_format = false;
#define FORMAT_CHUNK(t) \
format_chunk(str, off, has_size, size, format - format_size, \
format_size + 1, va_arg(ap, t))
for (size_t format_size = 1; *++format && !end_format; ++format_size) {
switch (*format) {
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X':
switch (*(format - 1)) {
case 'h':
// Also covers the 'hh' case (since the size of the arg is still
// an int).
status = FORMAT_CHUNK(int);
break;
case 'l':
if (format_size >= 2 && *(format - 2) == 'l') {
status = FORMAT_CHUNK(long long int);
} else {
status = FORMAT_CHUNK(long int);
}
break;
case 'q':
status = FORMAT_CHUNK(long long int);
break;
case 'j':
status = FORMAT_CHUNK(intmax_t);
break;
case 'z':
status = FORMAT_CHUNK(size_t);
break;
case 't':
status = FORMAT_CHUNK(size_t);
break;
default:
status = FORMAT_CHUNK(int);
}
chunk.label_type = Chunk::NUMERIC;
end_format = true;
break;
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
if (*(format - 1) == 'L') {
status = FORMAT_CHUNK(long double);
} else {
status = FORMAT_CHUNK(double);
}
chunk.label_type = Chunk::NUMERIC;
end_format = true;
break;
case 'c':
status = FORMAT_CHUNK(int);
chunk.label_type = Chunk::NUMERIC;
end_format = true;
break;
case 's':
chunk.arg = va_arg(ap, char *);
status =
format_chunk(str, off, has_size, size,
format - format_size, format_size + 1,
chunk.arg);
chunk.label_type = Chunk::STRING;
end_format = true;
break;
case 'p':
status = FORMAT_CHUNK(void *);
chunk.label_type = Chunk::NUMERIC;
end_format = true;
break;
case 'n':
*(va_arg(ap, int *)) = (int)off;
chunk.label_type = Chunk::IGNORED;
end_format = true;
break;
case '%':
status = format_chunk(str, off, has_size, size,
format - format_size, format_size + 1);
chunk.label_type = Chunk::NONE;
end_format = true;
break;
default:
break;
}
}
#undef FORMAT_CHUNK
}
if (status < 0) {
return status;
}
// A return value of {v,}snprintf of size or more means that the output was
// truncated.
if (has_size) {
if (off < size) {
size_t ustatus = (size_t) status;
chunk.size = ustatus >= (size - off) ?
ustatus - (size - off) : ustatus;
} else {
chunk.size = 0;
}
} else {
chunk.size = status;
}
off += status;
}
// Consume the labels of the output buffer, (optional) size, and format
// string.
//
// TODO(martignlo): Decide how to combine labels (e.g., whether to ignore or
// not the label of the format string).
va_arg(ap, dfsan_label_va);
if (has_size) {
va_arg(ap, dfsan_label_va);
}
va_arg(ap, dfsan_label_va);
// Label each output chunk according to the label supplied as argument to the
// function. We need to go through all the chunks and arguments even if the
// string was only partially printed ({v,}snprintf case).
for (size_t i = 0; i < chunks.size(); ++i) {
const Chunk& chunk = chunks[i];
switch (chunk.label_type) {
case Chunk::NONE:
dfsan_set_label(0, (void*) chunk.ptr, chunk.size);
break;
case Chunk::IGNORED:
va_arg(ap, dfsan_label_va);
dfsan_set_label(0, (void*) chunk.ptr, chunk.size);
break;
case Chunk::NUMERIC: {
dfsan_label label = va_arg(ap, dfsan_label_va);
dfsan_set_label(label, (void*) chunk.ptr, chunk.size);
break;
}
case Chunk::STRING: {
// Consume the label of the pointer to the string
va_arg(ap, dfsan_label_va);
internal_memcpy(shadow_for((void *) chunk.ptr),
shadow_for((void *) chunk.arg),
sizeof(dfsan_label) * (strlen(chunk.arg)));
break;
}
}
}
dfsan_label *ret_label_ptr = va_arg(ap, dfsan_label *);
*ret_label_ptr = 0;
// Number of bytes written in total.
return off;
}
SANITIZER_INTERFACE_ATTRIBUTE
int __dfsw_sprintf(char *str, const char *format, ...) {
va_list ap;
va_start(ap, format);
int ret = format_buffer(str, false, 0, format, ap);
va_end(ap);
return ret;
}
SANITIZER_INTERFACE_ATTRIBUTE
int __dfsw_snprintf(char *str, size_t size, const char *format, ...) {
va_list ap;
va_start(ap, format);
int ret = format_buffer(str, true, size, format, ap);
va_end(ap);
return ret;
}
}

View File

@ -208,9 +208,11 @@ fun:sigemptyset=custom
fun:sigaction=custom
fun:gettimeofday=custom
# sprintf-like
fun:sprintf=custom
fun:snprintf=custom
# TODO: custom
fun:snprintf=discard
fun:vsnprintf=discard
fun:asprintf=discard
fun:qsort=discard

View File

@ -1,7 +1,7 @@
#!/usr/bin/env bash
DFSAN_DIR=$(dirname "$0")/../
DFSAN_CUSTOM_TESTS=${DFSAN_DIR}/../../test/dfsan/custom.c
DFSAN_CUSTOM_TESTS=${DFSAN_DIR}/../../test/dfsan/custom.cc
DFSAN_CUSTOM_WRAPPERS=${DFSAN_DIR}/dfsan_custom.cc
DFSAN_ABI_LIST=${DFSAN_DIR}/done_abilist.txt

View File

@ -29,7 +29,7 @@ TSAN_LIT_TEST_LINT_FILTER=${TSAN_TEST_LINT_FILTER},-whitespace/line_length
MSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER}
LSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER}
LSAN_LIT_TEST_LINT_FILTER=${LSAN_RTL_LINT_FILTER},-whitespace/line_length
DFSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/printf,-runtime/references
DFSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/printf,-runtime/references,-readability/function
COMMON_RTL_INC_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/sizeof,-runtime/printf,-readability/fn_size
SANITIZER_INCLUDES_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int
MKTEMP="mktemp -q /tmp/tmp.XXXXXXXXXX"

View File

@ -5,7 +5,6 @@
// Tests custom implementations of various glibc functions.
#define _GNU_SOURCE
#include <sanitizer/dfsan_interface.h>
#include <arpa/inet.h>
@ -18,6 +17,7 @@
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
@ -256,12 +256,12 @@ void test_calloc() {
// With any luck this sequence of calls will cause calloc to return the same
// pointer both times. This is probably the best we can do to test this
// function.
char *crv = calloc(4096, 1);
char *crv = (char *) calloc(4096, 1);
ASSERT_ZERO_LABEL(crv[0]);
dfsan_set_label(i_label, crv, 100);
free(crv);
crv = calloc(4096, 1);
crv = (char *) calloc(4096, 1);
ASSERT_ZERO_LABEL(crv[0]);
free(crv);
}
@ -342,14 +342,14 @@ void test_ctime_r() {
static int write_callback_count = 0;
static int last_fd;
static const void *last_buf;
static const unsigned char *last_buf;
static size_t last_count;
void write_callback(int fd, const void *buf, size_t count) {
write_callback_count++;
last_fd = fd;
last_buf = buf;
last_buf = (const unsigned char*) buf;
last_count = count;
}
@ -694,11 +694,11 @@ void test_memchr() {
dfsan_set_label(i_label, &str1[3], 1);
dfsan_set_label(j_label, &str1[4], 1);
char *crv = memchr(str1, 'r', sizeof(str1));
char *crv = (char *) memchr(str1, 'r', sizeof(str1));
assert(crv == &str1[2]);
ASSERT_ZERO_LABEL(crv);
crv = memchr(str1, '1', sizeof(str1));
crv = (char *) memchr(str1, '1', sizeof(str1));
assert(crv == &str1[3]);
#ifdef STRICT_DATA_DEPENDENCIES
ASSERT_ZERO_LABEL(crv);
@ -706,7 +706,7 @@ void test_memchr() {
ASSERT_LABEL(crv, i_label);
#endif
crv = memchr(str1, 'x', sizeof(str1));
crv = (char *) memchr(str1, 'x', sizeof(str1));
assert(!crv);
#ifdef STRICT_DATA_DEPENDENCIES
ASSERT_ZERO_LABEL(crv);
@ -774,6 +774,124 @@ void test_write() {
close(fd);
}
template <class T>
void test_sprintf_chunk(const char* expected, const char* format, T arg) {
char buf[512];
memset(buf, 'a', sizeof(buf));
char padded_expected[512];
strcpy(padded_expected, "foo ");
strcat(padded_expected, expected);
strcat(padded_expected, " bar");
char padded_format[512];
strcpy(padded_format, "foo ");
strcat(padded_format, format);
strcat(padded_format, " bar");
// Non labelled arg.
assert(sprintf(buf, padded_format, arg) == strlen(padded_expected));
assert(strcmp(buf, padded_expected) == 0);
ASSERT_READ_LABEL(buf, strlen(padded_expected), 0);
memset(buf, 'a', sizeof(buf));
// Labelled arg.
dfsan_set_label(i_label, &arg, sizeof(arg));
assert(sprintf(buf, padded_format, arg) == strlen(padded_expected));
assert(strcmp(buf, padded_expected) == 0);
ASSERT_READ_LABEL(buf, 4, 0);
ASSERT_READ_LABEL(buf + 4, strlen(padded_expected) - 8, i_label);
ASSERT_READ_LABEL(buf + (strlen(padded_expected) - 4), 4, 0);
}
void test_sprintf() {
char buf[2048];
memset(buf, 'a', sizeof(buf));
// Test formatting (no conversion specifier).
assert(sprintf(buf, "Hello world!") == 12);
assert(strcmp(buf, "Hello world!") == 0);
ASSERT_READ_LABEL(buf, sizeof(buf), 0);
// Test formatting & label propagation (multiple conversion specifiers): %s,
// %d, %n, %f, and %%.
const char* s = "world";
int m = 8;
int d = 27;
dfsan_set_label(k_label, (void *) (s + 1), 2);
dfsan_set_label(i_label, &m, sizeof(m));
dfsan_set_label(j_label, &d, sizeof(d));
int n;
int r = sprintf(buf, "hello %s, %-d/%d/%d %f %% %n%d", s, 2014, m, d,
12345.6781234, &n, 1000);
assert(r == 42);
assert(strcmp(buf, "hello world, 2014/8/27 12345.678123 % 1000") == 0);
ASSERT_READ_LABEL(buf, 7, 0);
ASSERT_READ_LABEL(buf + 7, 2, k_label);
ASSERT_READ_LABEL(buf + 9, 9, 0);
ASSERT_READ_LABEL(buf + 18, 1, i_label);
ASSERT_READ_LABEL(buf + 19, 1, 0);
ASSERT_READ_LABEL(buf + 20, 2, j_label);
ASSERT_READ_LABEL(buf + 22, 15, 0);
ASSERT_LABEL(r, 0);
assert(n == 38);
// Test formatting & label propagation (single conversion specifier, with
// additional length and precision modifiers).
test_sprintf_chunk("-559038737", "%d", 0xdeadbeef);
test_sprintf_chunk("3735928559", "%u", 0xdeadbeef);
test_sprintf_chunk("12345", "%i", 12345);
test_sprintf_chunk("751", "%o", 0751);
test_sprintf_chunk("babe", "%x", 0xbabe);
test_sprintf_chunk("0000BABE", "%.8X", 0xbabe);
test_sprintf_chunk("-17", "%hhd", 0xdeadbeef);
test_sprintf_chunk("-16657", "%hd", 0xdeadbeef);
test_sprintf_chunk("deadbeefdeadbeef", "%lx", 0xdeadbeefdeadbeef);
test_sprintf_chunk("0xdeadbeefdeadbeef", "%p",
(void *) 0xdeadbeefdeadbeef);
test_sprintf_chunk("18446744073709551615", "%ju", (intmax_t) -1);
test_sprintf_chunk("18446744073709551615", "%zu", (size_t) -1);
test_sprintf_chunk("18446744073709551615", "%tu", (size_t) -1);
test_sprintf_chunk("0x1.f9acffa7eb6bfp-4", "%a", 0.123456);
test_sprintf_chunk("0X1.F9ACFFA7EB6BFP-4", "%A", 0.123456);
test_sprintf_chunk("0.12346", "%.5f", 0.123456);
test_sprintf_chunk("0.123456", "%g", 0.123456);
test_sprintf_chunk("1.234560e-01", "%e", 0.123456);
test_sprintf_chunk("1.234560E-01", "%E", 0.123456);
test_sprintf_chunk("0.1234567891234560", "%.16Lf",
(long double) 0.123456789123456);
test_sprintf_chunk("z", "%c", 'z');
// %n, %s, %d, %f, and %% already tested
}
void test_snprintf() {
char buf[2048];
memset(buf, 'a', sizeof(buf));
dfsan_set_label(0, buf, sizeof(buf));
const char* s = "world";
int y = 2014;
int m = 8;
int d = 27;
dfsan_set_label(k_label, (void *) (s + 1), 2);
dfsan_set_label(i_label, &y, sizeof(y));
dfsan_set_label(j_label, &m, sizeof(m));
int r = snprintf(buf, 19, "hello %s, %-d/%d/%d %f", s, y, m, d,
12345.6781234);
// The return value is the number of bytes that would have been written to
// the final string if enough space had been available.
assert(r == 35);
assert(memcmp(buf, "hello world, 2014/", 19) == 0);
ASSERT_READ_LABEL(buf, 7, 0);
ASSERT_READ_LABEL(buf + 7, 2, k_label);
ASSERT_READ_LABEL(buf + 9, 4, 0);
ASSERT_READ_LABEL(buf + 13, 4, i_label);
ASSERT_READ_LABEL(buf + 17, 2, 0);
ASSERT_LABEL(r, 0);
}
int main(void) {
i_label = dfsan_create_label("i", 0);
j_label = dfsan_create_label("j", 0);
@ -810,7 +928,9 @@ int main(void) {
test_select();
test_sigaction();
test_sigemptyset();
test_snprintf();
test_socketpair();
test_sprintf();
test_stat();
test_strcasecmp();
test_strchr();

View File

@ -31,4 +31,3 @@ config.suffixes = ['.c', '.cc', '.cpp']
if config.host_os not in ['Linux', 'Darwin']:
config.unsupported = True