DataFlowSanitizer; compiler-rt changes.

DataFlowSanitizer is a generalised dynamic data flow analysis.

Unlike other Sanitizer tools, this tool is not designed to detect a
specific class of bugs on its own.  Instead, it provides a generic
dynamic data flow analysis framework to be used by clients to help
detect application-specific issues within their own code.

Differential Revision: http://llvm-reviews.chandlerc.com/D967

llvm-svn: 187924
This commit is contained in:
Peter Collingbourne 2013-08-07 22:47:26 +00:00
parent e5d5b0c71e
commit 5cbab07d02
14 changed files with 541 additions and 1 deletions

View File

@ -1,6 +1,7 @@
set(SANITIZER_HEADERS
sanitizer/asan_interface.h
sanitizer/common_interface_defs.h
sanitizer/dfsan_interface.h
sanitizer/linux_syscall_hooks.h
sanitizer/lsan_interface.h
sanitizer/msan_interface.h)

View File

@ -0,0 +1,80 @@
//===-- dfsan_interface.h -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of DataFlowSanitizer.
//
// Public interface header.
//===----------------------------------------------------------------------===//
#ifndef DFSAN_INTERFACE_H
#define DFSAN_INTERFACE_H
#include <stddef.h>
#include <stdint.h>
#include <sanitizer/common_interface_defs.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef uint16_t dfsan_label;
/// Stores information associated with a specific label identifier. A label
/// may be a base label created using dfsan_create_label, with associated
/// text description and user data, or an automatically created union label,
/// which represents the union of two label identifiers (which may themselves
/// be base or union labels).
struct dfsan_label_info {
// Fields for union labels, set to 0 for base labels.
dfsan_label l1;
dfsan_label l2;
// Fields for base labels.
const char *desc;
void *userdata;
};
/// Creates and returns a base label with the given description and user data.
dfsan_label dfsan_create_label(const char *desc, void *userdata);
/// Sets the label for each address in [addr,addr+size) to \c label.
void dfsan_set_label(dfsan_label label, void *addr, size_t size);
/// Sets the label for each address in [addr,addr+size) to the union of the
/// current label for that address and \c label.
void dfsan_add_label(dfsan_label label, void *addr, size_t size);
/// Retrieves the label associated with the given data.
///
/// The type of 'data' is arbitrary. The function accepts a value of any type,
/// which can be truncated or extended (implicitly or explicitly) as necessary.
/// The truncation/extension operations will preserve the label of the original
/// value.
dfsan_label dfsan_get_label(long data);
/// Retrieves a pointer to the dfsan_label_info struct for the given label.
const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
/// Returns whether the given label label contains the label elem.
int dfsan_has_label(dfsan_label label, dfsan_label elem);
/// If the given label label contains a label with the description desc, returns
/// that label, else returns 0.
dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc);
#ifdef __cplusplus
} // extern "C"
template <typename T>
void dfsan_set_label(dfsan_label label, T &data) {
dfsan_set_label(label, (void *)&data, sizeof(T));
}
#endif
#endif // DFSAN_INTERFACE_H

View File

@ -17,6 +17,7 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT ANDROID)
add_subdirectory(tsan)
add_subdirectory(msan)
add_subdirectory(msandr)
add_subdirectory(dfsan)
endif()
# The top-level lib directory contains a large amount of C code which provides

View File

@ -22,6 +22,7 @@ SubDirs += tsan
SubDirs += msan
SubDirs += ubsan
SubDirs += lsan
SubDirs += dfsan
# Define the variables for this specific directory.
Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))

View File

@ -0,0 +1,29 @@
include_directories(..)
# Runtime library sources and build flags.
set(DFSAN_RTL_SOURCES
dfsan.cc
)
set(DFSAN_RTL_CFLAGS
${SANITIZER_COMMON_CFLAGS}
# Prevent clang from generating libc calls.
-ffreestanding)
# Static runtime library.
set(DFSAN_RUNTIME_LIBRARIES)
set(arch "x86_64")
if(CAN_TARGET_${arch})
add_compiler_rt_static_runtime(clang_rt.dfsan-${arch} ${arch}
SOURCES ${DFSAN_RTL_SOURCES}
$<TARGET_OBJECTS:RTInterception.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
CFLAGS ${DFSAN_RTL_CFLAGS} -fPIE)
add_compiler_rt_static_runtime(clang_rt.dfsan-libc-${arch} ${arch}
SOURCES ${DFSAN_RTL_SOURCES}
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
CFLAGS ${DFSAN_RTL_CFLAGS} -fPIC -DDFSAN_NOLIBC)
list(APPEND DFSAN_RUNTIME_LIBRARIES clang_rt.dfsan-${arch})
endif()
add_subdirectory(lit_tests)

View File

@ -0,0 +1,23 @@
#===- lib/dfsan/Makefile.mk --------------------------------*- Makefile -*--===#
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
#===------------------------------------------------------------------------===#
ModuleName := dfsan
SubDirs :=
Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
ObjNames := $(Sources:%.cc=%.o)
Implementation := Generic
# FIXME: use automatic dependencies?
Dependencies := $(wildcard $(Dir)/*.h)
Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
# Define a convenience variable for all the dfsan functions.
DfsanFunctions := $(Sources:%.cc=%)

View File

@ -0,0 +1,221 @@
//===-- dfsan.cc ----------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of DataFlowSanitizer.
//
// DataFlowSanitizer runtime. This file defines the public interface to
// DataFlowSanitizer as well as the definition of certain runtime functions
// called automatically by the compiler (specifically the instrumentation pass
// in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
//
// The public interface is defined in include/sanitizer/dfsan_interface.h whose
// functions are prefixed dfsan_ while the compiler interface functions are
// prefixed __dfsan_.
//===----------------------------------------------------------------------===//
#include "sanitizer/dfsan_interface.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
typedef atomic_uint16_t atomic_dfsan_label;
static const dfsan_label kInitializingLabel = -1;
static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
static atomic_dfsan_label __dfsan_last_label;
static dfsan_label_info __dfsan_label_info[kNumLabels];
SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
// On Linux/x86_64, memory is laid out as follows:
//
// +--------------------+ 0x800000000000 (top of memory)
// | application memory |
// +--------------------+ 0x700000008000 (kAppAddr)
// | |
// | unused |
// | |
// +--------------------+ 0x200200000000 (kUnusedAddr)
// | union table |
// +--------------------+ 0x200000000000 (kUnionTableAddr)
// | shadow memory |
// +--------------------+ 0x000000010000 (kShadowAddr)
// | reserved by kernel |
// +--------------------+ 0x000000000000
//
// To derive a shadow memory address from an application memory address,
// bits 44-46 are cleared to bring the address into the range
// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
// account for the double byte representation of shadow labels and move the
// address into the shadow memory range. See the function shadow_for below.
typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
static const uptr kShadowAddr = 0x10000;
static const uptr kUnionTableAddr = 0x200000000000;
static const uptr kUnusedAddr = kUnionTableAddr + sizeof(dfsan_union_table_t);
static const uptr kAppAddr = 0x700000008000;
static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
return &(*(dfsan_union_table_t *) kUnionTableAddr)[l1][l2];
}
static dfsan_label *shadow_for(void *ptr) {
return (dfsan_label *) ((((uintptr_t) ptr) & ~0x700000000000) << 1);
}
// Resolves the union of two unequal labels. Nonequality is a precondition for
// this function (the instrumentation pass inlines the equality test).
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
DCHECK_NE(l1, l2);
if (l1 == 0)
return l2;
if (l2 == 0)
return l1;
if (l1 > l2)
Swap(l1, l2);
atomic_dfsan_label *table_ent = union_table(l1, l2);
// We need to deal with the case where two threads concurrently request
// a union of the same pair of labels. If the table entry is uninitialized,
// (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
// (i.e. -1) to mark that we are initializing it.
dfsan_label label = 0;
if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
memory_order_acquire)) {
// Check whether l2 subsumes l1. We don't need to check whether l1
// subsumes l2 because we are guaranteed here that l1 < l2, and (at least
// in the cases we are interested in) a label may only subsume labels
// created earlier (i.e. with a lower numerical value).
if (__dfsan_label_info[l2].l1 == l1 ||
__dfsan_label_info[l2].l2 == l1) {
label = l2;
} else {
label =
atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
CHECK_NE(label, kInitializingLabel);
__dfsan_label_info[label].l1 = l1;
__dfsan_label_info[label].l2 = l2;
}
atomic_store(table_ent, label, memory_order_release);
} else if (label == kInitializingLabel) {
// Another thread is initializing the entry. Wait until it is finished.
do {
internal_sched_yield();
label = atomic_load(table_ent, memory_order_acquire);
} while (label == kInitializingLabel);
}
return label;
}
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
dfsan_label __dfsan_union_load(dfsan_label *ls, size_t n) {
dfsan_label label = ls[0];
for (size_t i = 1; i != n; ++i) {
dfsan_label next_label = ls[i];
if (label != next_label)
label = __dfsan_union(label, next_label);
}
return label;
}
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
void *__dfsan_memcpy(void *dest, const void *src, size_t n) {
dfsan_label *sdest = shadow_for(dest), *ssrc = shadow_for((void *)src);
internal_memcpy((void *)sdest, (void *)ssrc, n * sizeof(dfsan_label));
return internal_memcpy(dest, src, n);
}
SANITIZER_INTERFACE_ATTRIBUTE
dfsan_label dfsan_create_label(const char *desc, void *userdata) {
dfsan_label label =
atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
CHECK_NE(label, kInitializingLabel);
__dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
__dfsan_label_info[label].desc = desc;
__dfsan_label_info[label].userdata = userdata;
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
return label;
}
SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_set_label(dfsan_label label, void *addr, size_t size) {
for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
*labelp = label;
}
SANITIZER_INTERFACE_ATTRIBUTE
void dfsan_add_label(dfsan_label label, void *addr, size_t size) {
for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
if (*labelp != label)
*labelp = __dfsan_union(*labelp, label);
}
SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_get_label(long data) {
// The label for 'data' is implicitly passed by the instrumentation pass in
// the first element of __dfsan_arg_tls. So we can just return it.
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
return __dfsan_arg_tls[0];
}
SANITIZER_INTERFACE_ATTRIBUTE
const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
return &__dfsan_label_info[label];
}
int dfsan_has_label(dfsan_label label, dfsan_label elem) {
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
if (label == elem)
return true;
const dfsan_label_info *info = dfsan_get_label_info(label);
if (info->l1 != 0) {
return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
} else {
return false;
}
}
dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
__dfsan_retval_tls = 0; // Ensures return value is unlabelled in the caller.
const dfsan_label_info *info = dfsan_get_label_info(label);
if (info->l1 != 0) {
return dfsan_has_label_with_desc(info->l1, desc) ||
dfsan_has_label_with_desc(info->l2, desc);
} else {
return internal_strcmp(desc, info->desc) == 0;
}
}
#ifdef DFSAN_NOLIBC
extern "C" void dfsan_init() {
#else
static void dfsan_init(int argc, char **argv, char **envp) {
#endif
MmapFixedNoReserve(kShadowAddr, kUnusedAddr - kShadowAddr);
// Protect the region of memory we don't use, to preserve the one-to-one
// mapping from application to shadow memory. But if ASLR is disabled, Linux
// will load our executable in the middle of our unused region. This mostly
// works so long as the program doesn't use too much memory. We support this
// case by disabling memory protection when ASLR is disabled.
uptr init_addr = (uptr)&dfsan_init;
if (!(init_addr >= kUnusedAddr && init_addr < kAppAddr))
Mprotect(kUnusedAddr, kAppAddr - kUnusedAddr);
}
#ifndef DFSAN_NOLIBC
__attribute__((section(".preinit_array"), used))
static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
#endif

View File

@ -0,0 +1,23 @@
set(DFSAN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
set(DFSAN_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/..)
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
)
if(COMPILER_RT_CAN_EXECUTE_TESTS)
# Run DFSan tests only if we're sure we may produce working binaries.
set(DFSAN_TEST_DEPS
${SANITIZER_COMMON_LIT_TEST_DEPS}
${DFSAN_RUNTIME_LIBRARIES})
set(DFSAN_TEST_PARAMS
dfsan_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
)
add_lit_testsuite(check-dfsan "Running the DataFlowSanitizer tests"
${CMAKE_CURRENT_BINARY_DIR}
PARAMS ${DFSAN_TEST_PARAMS}
DEPENDS ${DFSAN_TEST_DEPS}
)
set_target_properties(check-dfsan PROPERTIES FOLDER "DFSan tests")
endif()

View File

@ -0,0 +1,17 @@
// RUN: %clang_dfsan -m64 %s -o %t && %t
// Tests that labels are propagated through loads and stores.
#include <sanitizer/dfsan_interface.h>
#include <assert.h>
int main(void) {
int i = 1;
dfsan_label i_label = dfsan_create_label("i", 0);
dfsan_set_label(i_label, &i, sizeof(i));
dfsan_label new_label = dfsan_get_label(i);
assert(i_label == new_label);
return 0;
}

View File

@ -0,0 +1,25 @@
// RUN: %clang_dfsan -m64 %s -o %t && %t
// Tests that labels are propagated through function calls.
#include <sanitizer/dfsan_interface.h>
#include <assert.h>
int f(int x) {
int j = 2;
dfsan_label j_label = dfsan_create_label("j", 0);
dfsan_set_label(j_label, &j, sizeof(j));
return x + j;
}
int main(void) {
int i = 1;
dfsan_label i_label = dfsan_create_label("i", 0);
dfsan_set_label(i_label, &i, sizeof(i));
dfsan_label ij_label = dfsan_get_label(f(i));
assert(dfsan_has_label(ij_label, i_label));
assert(dfsan_has_label_with_desc(ij_label, "j"));
return 0;
}

View File

@ -0,0 +1,66 @@
# -*- Python -*-
import os
def get_required_attr(config, attr_name):
attr_value = getattr(config, attr_name, None)
if not attr_value:
lit.fatal("No attribute %r in test configuration! You may need to run "
"tests from your build directory or add this attribute "
"to lit.site.cfg " % attr_name)
return attr_value
# Setup config name.
config.name = 'DataFlowSanitizer'
# Setup source root.
config.test_source_root = os.path.dirname(__file__)
def DisplayNoConfigMessage():
lit.fatal("No site specific configuration available! " +
"Try running your test from the build tree or running " +
"make check-dfsan")
# Figure out LLVM source root.
llvm_src_root = getattr(config, 'llvm_src_root', None)
if llvm_src_root is None:
# We probably haven't loaded the site-specific configuration: the user
# is likely trying to run a test file directly, and the site configuration
# wasn't created by the build system.
dfsan_site_cfg = lit.params.get('dfsan_site_config', None)
if (dfsan_site_cfg) and (os.path.exists(dfsan_site_cfg)):
lit.load_config(config, dfsan_site_cfg)
raise SystemExit
# Try to guess the location of site-specific configuration using llvm-config
# util that can point where the build tree is.
llvm_config = lit.util.which("llvm-config", config.environment["PATH"])
if not llvm_config:
DisplayNoConfigMessage()
# Find out the presumed location of generated site config.
llvm_obj_root = lit.util.capture(["llvm-config", "--obj-root"]).strip()
dfsan_site_cfg = os.path.join(llvm_obj_root, "projects", "compiler-rt",
"lib", "dfsan", "lit_tests", "lit.site.cfg")
if (not dfsan_site_cfg) or (not os.path.exists(dfsan_site_cfg)):
DisplayNoConfigMessage()
lit.load_config(config, dfsan_site_cfg)
raise SystemExit
# Setup default compiler flags used with -fsanitize=dataflow option.
clang_dfsan_cflags = ["-fsanitize=dataflow"]
clang_dfsan_cxxflags = ["-ccc-cxx "] + clang_dfsan_cflags
config.substitutions.append( ("%clang_dfsan ",
" ".join([config.clang] + clang_dfsan_cflags) +
" ") )
config.substitutions.append( ("%clangxx_dfsan ",
" ".join([config.clang] + clang_dfsan_cxxflags) +
" ") )
# Default test suffixes.
config.suffixes = ['.c', '.cc', '.cpp']
# DataFlowSanitizer tests are currently supported on Linux only.
if config.host_os not in ['Linux']:
config.unsupported = True

View File

@ -0,0 +1,17 @@
config.target_triple = "@TARGET_TRIPLE@"
config.host_os = "@HOST_OS@"
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
config.clang = "@LLVM_BINARY_DIR@/bin/clang"
# LLVM tools dir can be passed in lit parameters, so try to
# apply substitution.
try:
config.llvm_tools_dir = config.llvm_tools_dir % lit.params
except KeyError,e:
key, = e.args
lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
# Let the main config do the real work.
lit.load_config(config, "@DFSAN_SOURCE_DIR@/lit_tests/lit.cfg")

View File

@ -0,0 +1,33 @@
// RUN: %clang_dfsan -m64 %s -o %t && %t
// Tests that labels are propagated through computation and that union labels
// are properly created.
#include <sanitizer/dfsan_interface.h>
#include <assert.h>
int main(void) {
int i = 1;
dfsan_label i_label = dfsan_create_label("i", 0);
dfsan_set_label(i_label, &i, sizeof(i));
int j = 2;
dfsan_label j_label = dfsan_create_label("j", 0);
dfsan_set_label(j_label, &j, sizeof(j));
int k = 3;
dfsan_label k_label = dfsan_create_label("k", 0);
dfsan_set_label(k_label, &k, sizeof(k));
dfsan_label ij_label = dfsan_get_label(i + j);
assert(dfsan_has_label(ij_label, i_label));
assert(dfsan_has_label(ij_label, j_label));
assert(!dfsan_has_label(ij_label, k_label));
dfsan_label ijk_label = dfsan_get_label(i + j + k);
assert(dfsan_has_label(ijk_label, i_label));
assert(dfsan_has_label(ijk_label, j_label));
assert(dfsan_has_label(ijk_label, k_label));
return 0;
}

View File

@ -61,7 +61,7 @@ endif
# Build runtime libraries for x86_64.
ifeq ($(call contains,$(SupportedArches),x86_64),true)
Configs += full-x86_64 profile-x86_64 san-x86_64 asan-x86_64 tsan-x86_64 \
msan-x86_64 ubsan-x86_64 ubsan_cxx-x86_64
msan-x86_64 ubsan-x86_64 ubsan_cxx-x86_64 dfsan-x86_64
Arch.full-x86_64 := x86_64
Arch.profile-x86_64 := x86_64
Arch.san-x86_64 := x86_64
@ -70,6 +70,7 @@ Arch.tsan-x86_64 := x86_64
Arch.msan-x86_64 := x86_64
Arch.ubsan-x86_64 := x86_64
Arch.ubsan_cxx-x86_64 := x86_64
Arch.dfsan-x86_64 := x86_64
endif
ifneq ($(LLVM_ANDROID_TOOLCHAIN_DIR),)
@ -101,6 +102,7 @@ CFLAGS.ubsan-i386 := $(CFLAGS) -m32 $(SANITIZER_CFLAGS) -fno-rtti
CFLAGS.ubsan-x86_64 := $(CFLAGS) -m64 $(SANITIZER_CFLAGS) -fno-rtti
CFLAGS.ubsan_cxx-i386 := $(CFLAGS) -m32 $(SANITIZER_CFLAGS)
CFLAGS.ubsan_cxx-x86_64 := $(CFLAGS) -m64 $(SANITIZER_CFLAGS)
CFLAGS.dfsan-x86_64 := $(CFLAGS) -m64 $(SANITIZER_CFLAGS)
SHARED_LIBRARY.asan-arm-android := 1
ANDROID_COMMON_FLAGS := -target arm-linux-androideabi \
@ -137,6 +139,7 @@ FUNCTIONS.ubsan-i386 := $(UbsanFunctions)
FUNCTIONS.ubsan-x86_64 := $(UbsanFunctions)
FUNCTIONS.ubsan_cxx-i386 := $(UbsanCXXFunctions)
FUNCTIONS.ubsan_cxx-x86_64 := $(UbsanCXXFunctions)
FUNCTIONS.dfsan-x86_64 := $(DfsanFunctions) $(SanitizerCommonFunctions)
# Always use optimized variants.
OPTIMIZED := 1