[dfsan] Track origin at loads

The first version of origin tracking tracks only memory stores. Although
    this is sufficient for understanding correct flows, it is hard to figure
    out where an undefined value is read from. To find reading undefined values,
    we still have to do a reverse binary search from the last store in the chain
    with printing and logging at possible code paths. This is
    quite inefficient.

    Tracking memory load instructions can help this case. The main issues of
    tracking loads are performance and code size overheads.

    With tracking only stores, the code size overhead is 38%,
    memory overhead is 1x, and cpu overhead is 3x. In practice #load is much
    larger than #store, so both code size and cpu overhead increases. The
    first blocker is code size overhead: link fails if we inline tracking
    loads. The workaround is using external function calls to propagate
    metadata. This is also the workaround ASan uses. The cpu overhead
    is ~10x. This is a trade off between debuggability and performance,
    and will be used only when debugging cases that tracking only stores
    is not enough.

Reviewed By: gbalats

Differential Revision: https://reviews.llvm.org/D100967
This commit is contained in:
Jianzhou Zhao 2021-04-21 04:54:29 +00:00
parent 5dfbcc5ae9
commit 7fdf270965
5 changed files with 153 additions and 13 deletions

View File

@ -559,14 +559,26 @@ static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,
}
}
#define RET_CHAIN_ORIGIN(id) \
GET_CALLER_PC_BP_SP; \
(void)sp; \
GET_STORE_STACK_TRACE_PC_BP(pc, bp); \
return ChainOrigin(id, &stack);
// Return a new origin chain with the previous ID id and the current stack
// trace.
extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsan_chain_origin(dfsan_origin id) {
GET_CALLER_PC_BP_SP;
(void)sp;
GET_STORE_STACK_TRACE_PC_BP(pc, bp);
return ChainOrigin(id, &stack);
RET_CHAIN_ORIGIN(id)
}
// Return a new origin chain with the previous ID id and the current stack
// trace if the label is tainted.
extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsan_chain_origin_if_tainted(dfsan_label label, dfsan_origin id) {
if (!label)
return id;
RET_CHAIN_ORIGIN(id)
}
// Copy or move the origins of the len bytes from src to dst.

View File

@ -0,0 +1,31 @@
// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=2 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
// RUN: %run %t > %t.out 2>&1
// RUN: FileCheck %s < %t.out
//
// REQUIRES: x86_64-target-arch
#include <sanitizer/dfsan_interface.h>
__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; }
int main(int argc, char *argv[]) {
uint64_t a = 10;
uint64_t b = 20;
dfsan_set_label(8, &a, sizeof(a));
uint64_t c = foo(a, b);
dfsan_print_origin_trace(&c, NULL);
}
// CHECK: Taint value 0x8 {{.*}} origin tracking ()
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-6]]
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in dfs$foo {{.*}}origin_track_ld.c:[[@LINE-15]]
// CHECK: #1 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-10]]
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-13]]
// CHECK: Origin value: {{.*}}, Taint value was created at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-17]]

View File

@ -256,7 +256,8 @@ static cl::opt<int> ClInstrumentWithCallThreshold(
// Controls how to track origins.
// * 0: do not track origins.
// * 1: track origins at memory store operations.
// * 2: TODO: track origins at memory store operations and callsites.
// * 2: track origins at memory load and store operations.
// TODO: track callsites.
static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
cl::desc("Track origins of labels"),
cl::Hidden, cl::init(0));
@ -453,6 +454,7 @@ class DataFlowSanitizer {
FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy;
FunctionType *DFSanChainOriginFnTy;
FunctionType *DFSanChainOriginIfTaintedFnTy;
FunctionType *DFSanMemOriginTransferFnTy;
FunctionType *DFSanMaybeStoreOriginFnTy;
FunctionCallee DFSanUnionFn;
@ -469,6 +471,7 @@ class DataFlowSanitizer {
FunctionCallee DFSanMemTransferCallbackFn;
FunctionCallee DFSanCmpCallbackFn;
FunctionCallee DFSanChainOriginFn;
FunctionCallee DFSanChainOriginIfTaintedFn;
FunctionCallee DFSanMemOriginTransferFn;
FunctionCallee DFSanMaybeStoreOriginFn;
SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
@ -637,9 +640,18 @@ struct DFSanFunction {
Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
Instruction *Pos);
Value *combineOperandShadows(Instruction *Inst);
std::pair<Value *, Value *> loadShadowOrigin(Value *ShadowAddr, uint64_t Size,
/// Generates IR to load shadow and origin corresponding to bytes [\p
/// Addr, \p Addr + \p Size), where addr has alignment \p
/// InstAlignment, and take the union of each of those shadows. The returned
/// shadow always has primitive type.
///
/// When tracking loads is enabled, the returned origin is a chain at the
/// current stack if the returned shadow is tainted.
std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
Align InstAlignment,
Instruction *Pos);
void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
Align InstAlignment, Value *PrimitiveShadow,
Value *Origin, Instruction *Pos);
@ -695,11 +707,18 @@ private:
/// additional call with many instructions. To ensure common cases are fast,
/// checks if it is possible to load labels and origins without using the
/// callback function.
///
/// When enabling tracking load instructions, we always use
/// __dfsan_load_label_and_origin to reduce code size.
bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
/// Returns a chain at the current stack with previous origin V.
Value *updateOrigin(Value *V, IRBuilder<> &IRB);
/// Returns a chain at the current stack with previous origin V if Shadow is
/// tainted.
Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
/// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
/// Origin otherwise.
Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
@ -722,6 +741,13 @@ private:
bool shouldInstrumentWithCall();
/// Generates IR to load shadow and origin corresponding to bytes [\p
/// Addr, \p Addr + \p Size), where addr has alignment \p
/// InstAlignment, and take the union of each of those shadows. The returned
/// shadow always has primitive type.
std::pair<Value *, Value *>
loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
Align InstAlignment, Instruction *Pos);
int NumOriginStores = 0;
};
@ -1110,6 +1136,9 @@ bool DataFlowSanitizer::init(Module &M) {
/*isVarArg=*/false);
DFSanChainOriginFnTy =
FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
DFSanChainOriginIfTaintedFnTy = FunctionType::get(
OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
Int8Ptr, IntptrTy, OriginTy};
DFSanMaybeStoreOriginFnTy = FunctionType::get(
@ -1343,6 +1372,15 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
DFSanChainOriginFnTy, AL);
}
{
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
Attribute::ZExt);
DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
"__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
}
DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
"__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
@ -1381,6 +1419,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanChainOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
@ -2033,6 +2073,11 @@ Align DFSanFunction::getOriginAlign(Align InstAlignment) {
bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
Align InstAlignment) {
// When enabling tracking load instructions, we always use
// __dfsan_load_label_and_origin to reduce code size.
if (ClTrackOrigins == 2)
return true;
assert(Size != 0);
// * if Size == 1, it is sufficient to load its origin aligned at 4.
// * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
@ -2198,13 +2243,8 @@ Value *DFSanFunction::loadLegacyShadowFast(Value *ShadowAddr, uint64_t Size,
return Shadow;
}
// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
// Addr has alignment Align, and take the union of each of those shadows. The
// returned shadow always has primitive type.
std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
uint64_t Size,
Align InstAlignment,
Instruction *Pos) {
std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
// Non-escaped loads.
@ -2309,6 +2349,24 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
return {FallbackCall, Origin};
}
std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
uint64_t Size,
Align InstAlignment,
Instruction *Pos) {
Value *PrimitiveShadow, *Origin;
std::tie(PrimitiveShadow, Origin) =
loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
if (DFS.shouldTrackOrigins()) {
if (ClTrackOrigins == 2) {
IRBuilder<> IRB(Pos);
auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
if (!ConstantShadow || !ConstantShadow->isZeroValue())
Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
}
}
return {PrimitiveShadow, Origin};
}
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
switch (AO) {
case AtomicOrdering::NotAtomic:
@ -2380,6 +2438,12 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
}
}
Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
IRBuilder<> &IRB) {
assert(DFS.shouldTrackOrigins());
return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
}
Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
if (!DFS.shouldTrackOrigins())
return V;

View File

@ -55,5 +55,6 @@ define void @store(i8* %p) {
; CHECK: declare void @__dfsan_nonzero_label()
; CHECK: declare void @__dfsan_vararg_wrapper(i8*)
; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext)
; CHECK: declare zeroext i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] zeroext, i32 zeroext)
; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64)
; CHECK: declare void @__dfsan_maybe_store_origin(i[[#SBITS]] zeroext, i8*, i64, i32 zeroext)

View File

@ -0,0 +1,32 @@
; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-8-labels -S | FileCheck %s
; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-16-labels -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
define i64 @load64(i64* %p) {
; CHECK-LABEL: @"dfs$load64"
; CHECK-NEXT: %[[#PO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
; CHECK-NEXT: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN:2]]
; CHECK-NEXT: %[[#INTP:]] = bitcast i64* %p to i8*
; CHECK-NEXT: %[[#LABEL_ORIGIN:]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* %[[#INTP]], i64 8)
; CHECK-NEXT: %[[#LABEL_ORIGIN_H32:]] = lshr i64 %[[#LABEL_ORIGIN]], 32
; CHECK-NEXT: %[[#LABEL:]] = trunc i64 %[[#LABEL_ORIGIN_H32]] to i[[#SBITS]]
; CHECK-NEXT: %[[#ORIGIN:]] = trunc i64 %[[#LABEL_ORIGIN]] to i32
; CHECK-NEXT: %[[#ORIGIN_CHAINED:]] = call i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] %[[#LABEL]], i32 %[[#ORIGIN]])
; CHECK-NEXT: %[[#LABEL:]] = or i[[#SBITS]] %[[#LABEL]], %[[#PS]]
; CHECK-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0
; CHECK-NEXT: %[[#ORIGIN_SEL:]] = select i1 %[[#NZ]], i32 %[[#PO]], i32 %[[#ORIGIN_CHAINED]]
; CHECK-NEXT: %a = load i64, i64* %p
; CHECK-NEXT: store i[[#SBITS]] %[[#LABEL]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]]
; CHECK-NEXT: store i32 %[[#ORIGIN_SEL]], i32* @__dfsan_retval_origin_tls, align 4
%a = load i64, i64* %p
ret i64 %a
}