[dfsan] Track origin at loads

The first version of origin tracking tracks only memory stores. Although
    this is sufficient for understanding correct flows, it is hard to figure
    out where an undefined value is read from. To find reading undefined values,
    we still have to do a reverse binary search from the last store in the chain
    with printing and logging at possible code paths. This is
    quite inefficient.

    Tracking memory load instructions can help this case. The main issues of
    tracking loads are performance and code size overheads.

    With tracking only stores, the code size overhead is 38%,
    memory overhead is 1x, and cpu overhead is 3x. In practice #load is much
    larger than #store, so both code size and cpu overhead increases. The
    first blocker is code size overhead: link fails if we inline tracking
    loads. The workaround is using external function calls to propagate
    metadata. This is also the workaround ASan uses. The cpu overhead
    is ~10x. This is a trade off between debuggability and performance,
    and will be used only when debugging cases that tracking only stores
    is not enough.

Reviewed By: gbalats

Differential Revision: https://reviews.llvm.org/D100967
This commit is contained in:
Jianzhou Zhao 2021-04-21 04:54:29 +00:00
parent 5dfbcc5ae9
commit 7fdf270965
5 changed files with 153 additions and 13 deletions

View File

@ -559,14 +559,26 @@ static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,
} }
} }
#define RET_CHAIN_ORIGIN(id) \
GET_CALLER_PC_BP_SP; \
(void)sp; \
GET_STORE_STACK_TRACE_PC_BP(pc, bp); \
return ChainOrigin(id, &stack);
// Return a new origin chain with the previous ID id and the current stack // Return a new origin chain with the previous ID id and the current stack
// trace. // trace.
extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsan_chain_origin(dfsan_origin id) { __dfsan_chain_origin(dfsan_origin id) {
GET_CALLER_PC_BP_SP; RET_CHAIN_ORIGIN(id)
(void)sp; }
GET_STORE_STACK_TRACE_PC_BP(pc, bp);
return ChainOrigin(id, &stack); // Return a new origin chain with the previous ID id and the current stack
// trace if the label is tainted.
extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin
__dfsan_chain_origin_if_tainted(dfsan_label label, dfsan_origin id) {
if (!label)
return id;
RET_CHAIN_ORIGIN(id)
} }
// Copy or move the origins of the len bytes from src to dst. // Copy or move the origins of the len bytes from src to dst.

View File

@ -0,0 +1,31 @@
// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=2 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
// RUN: %run %t > %t.out 2>&1
// RUN: FileCheck %s < %t.out
//
// REQUIRES: x86_64-target-arch
#include <sanitizer/dfsan_interface.h>
__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; }
int main(int argc, char *argv[]) {
uint64_t a = 10;
uint64_t b = 20;
dfsan_set_label(8, &a, sizeof(a));
uint64_t c = foo(a, b);
dfsan_print_origin_trace(&c, NULL);
}
// CHECK: Taint value 0x8 {{.*}} origin tracking ()
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-6]]
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in dfs$foo {{.*}}origin_track_ld.c:[[@LINE-15]]
// CHECK: #1 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-10]]
// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-13]]
// CHECK: Origin value: {{.*}}, Taint value was created at
// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-17]]

View File

@ -256,7 +256,8 @@ static cl::opt<int> ClInstrumentWithCallThreshold(
// Controls how to track origins. // Controls how to track origins.
// * 0: do not track origins. // * 0: do not track origins.
// * 1: track origins at memory store operations. // * 1: track origins at memory store operations.
// * 2: TODO: track origins at memory store operations and callsites. // * 2: track origins at memory load and store operations.
// TODO: track callsites.
static cl::opt<int> ClTrackOrigins("dfsan-track-origins", static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
cl::desc("Track origins of labels"), cl::desc("Track origins of labels"),
cl::Hidden, cl::init(0)); cl::Hidden, cl::init(0));
@ -453,6 +454,7 @@ class DataFlowSanitizer {
FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy;
FunctionType *DFSanChainOriginFnTy; FunctionType *DFSanChainOriginFnTy;
FunctionType *DFSanChainOriginIfTaintedFnTy;
FunctionType *DFSanMemOriginTransferFnTy; FunctionType *DFSanMemOriginTransferFnTy;
FunctionType *DFSanMaybeStoreOriginFnTy; FunctionType *DFSanMaybeStoreOriginFnTy;
FunctionCallee DFSanUnionFn; FunctionCallee DFSanUnionFn;
@ -469,6 +471,7 @@ class DataFlowSanitizer {
FunctionCallee DFSanMemTransferCallbackFn; FunctionCallee DFSanMemTransferCallbackFn;
FunctionCallee DFSanCmpCallbackFn; FunctionCallee DFSanCmpCallbackFn;
FunctionCallee DFSanChainOriginFn; FunctionCallee DFSanChainOriginFn;
FunctionCallee DFSanChainOriginIfTaintedFn;
FunctionCallee DFSanMemOriginTransferFn; FunctionCallee DFSanMemOriginTransferFn;
FunctionCallee DFSanMaybeStoreOriginFn; FunctionCallee DFSanMaybeStoreOriginFn;
SmallPtrSet<Value *, 16> DFSanRuntimeFunctions; SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
@ -637,9 +640,18 @@ struct DFSanFunction {
Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2, Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
Instruction *Pos); Instruction *Pos);
Value *combineOperandShadows(Instruction *Inst); Value *combineOperandShadows(Instruction *Inst);
std::pair<Value *, Value *> loadShadowOrigin(Value *ShadowAddr, uint64_t Size,
/// Generates IR to load shadow and origin corresponding to bytes [\p
/// Addr, \p Addr + \p Size), where addr has alignment \p
/// InstAlignment, and take the union of each of those shadows. The returned
/// shadow always has primitive type.
///
/// When tracking loads is enabled, the returned origin is a chain at the
/// current stack if the returned shadow is tainted.
std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
Align InstAlignment, Align InstAlignment,
Instruction *Pos); Instruction *Pos);
void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
Align InstAlignment, Value *PrimitiveShadow, Align InstAlignment, Value *PrimitiveShadow,
Value *Origin, Instruction *Pos); Value *Origin, Instruction *Pos);
@ -695,11 +707,18 @@ private:
/// additional call with many instructions. To ensure common cases are fast, /// additional call with many instructions. To ensure common cases are fast,
/// checks if it is possible to load labels and origins without using the /// checks if it is possible to load labels and origins without using the
/// callback function. /// callback function.
///
/// When enabling tracking load instructions, we always use
/// __dfsan_load_label_and_origin to reduce code size.
bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment); bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
/// Returns a chain at the current stack with previous origin V. /// Returns a chain at the current stack with previous origin V.
Value *updateOrigin(Value *V, IRBuilder<> &IRB); Value *updateOrigin(Value *V, IRBuilder<> &IRB);
/// Returns a chain at the current stack with previous origin V if Shadow is
/// tainted.
Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
/// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
/// Origin otherwise. /// Origin otherwise.
Value *originToIntptr(IRBuilder<> &IRB, Value *Origin); Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
@ -722,6 +741,13 @@ private:
bool shouldInstrumentWithCall(); bool shouldInstrumentWithCall();
/// Generates IR to load shadow and origin corresponding to bytes [\p
/// Addr, \p Addr + \p Size), where addr has alignment \p
/// InstAlignment, and take the union of each of those shadows. The returned
/// shadow always has primitive type.
std::pair<Value *, Value *>
loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
Align InstAlignment, Instruction *Pos);
int NumOriginStores = 0; int NumOriginStores = 0;
}; };
@ -1110,6 +1136,9 @@ bool DataFlowSanitizer::init(Module &M) {
/*isVarArg=*/false); /*isVarArg=*/false);
DFSanChainOriginFnTy = DFSanChainOriginFnTy =
FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false); FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
DFSanChainOriginIfTaintedFnTy = FunctionType::get(
OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits), Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
Int8Ptr, IntptrTy, OriginTy}; Int8Ptr, IntptrTy, OriginTy};
DFSanMaybeStoreOriginFnTy = FunctionType::get( DFSanMaybeStoreOriginFnTy = FunctionType::get(
@ -1343,6 +1372,15 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin", DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
DFSanChainOriginFnTy, AL); DFSanChainOriginFnTy, AL);
} }
{
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
Attribute::ZExt);
DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
"__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
}
DFSanMemOriginTransferFn = Mod->getOrInsertFunction( DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
"__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy); "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
@ -1381,6 +1419,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert( DFSanRuntimeFunctions.insert(
DFSanChainOriginFn.getCallee()->stripPointerCasts()); DFSanChainOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert( DFSanRuntimeFunctions.insert(
DFSanMemOriginTransferFn.getCallee()->stripPointerCasts()); DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert( DFSanRuntimeFunctions.insert(
@ -2033,6 +2073,11 @@ Align DFSanFunction::getOriginAlign(Align InstAlignment) {
bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
Align InstAlignment) { Align InstAlignment) {
// When enabling tracking load instructions, we always use
// __dfsan_load_label_and_origin to reduce code size.
if (ClTrackOrigins == 2)
return true;
assert(Size != 0); assert(Size != 0);
// * if Size == 1, it is sufficient to load its origin aligned at 4. // * if Size == 1, it is sufficient to load its origin aligned at 4.
// * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
@ -2198,13 +2243,8 @@ Value *DFSanFunction::loadLegacyShadowFast(Value *ShadowAddr, uint64_t Size,
return Shadow; return Shadow;
} }
// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
// Addr has alignment Align, and take the union of each of those shadows. The Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
// returned shadow always has primitive type.
std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
uint64_t Size,
Align InstAlignment,
Instruction *Pos) {
const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
// Non-escaped loads. // Non-escaped loads.
@ -2309,6 +2349,24 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
return {FallbackCall, Origin}; return {FallbackCall, Origin};
} }
std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
uint64_t Size,
Align InstAlignment,
Instruction *Pos) {
Value *PrimitiveShadow, *Origin;
std::tie(PrimitiveShadow, Origin) =
loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
if (DFS.shouldTrackOrigins()) {
if (ClTrackOrigins == 2) {
IRBuilder<> IRB(Pos);
auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
if (!ConstantShadow || !ConstantShadow->isZeroValue())
Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
}
}
return {PrimitiveShadow, Origin};
}
static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) { static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
switch (AO) { switch (AO) {
case AtomicOrdering::NotAtomic: case AtomicOrdering::NotAtomic:
@ -2380,6 +2438,12 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
} }
} }
Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
IRBuilder<> &IRB) {
assert(DFS.shouldTrackOrigins());
return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
}
Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) { Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
if (!DFS.shouldTrackOrigins()) if (!DFS.shouldTrackOrigins())
return V; return V;

View File

@ -55,5 +55,6 @@ define void @store(i8* %p) {
; CHECK: declare void @__dfsan_nonzero_label() ; CHECK: declare void @__dfsan_nonzero_label()
; CHECK: declare void @__dfsan_vararg_wrapper(i8*) ; CHECK: declare void @__dfsan_vararg_wrapper(i8*)
; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext) ; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext)
; CHECK: declare zeroext i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] zeroext, i32 zeroext)
; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64) ; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64)
; CHECK: declare void @__dfsan_maybe_store_origin(i[[#SBITS]] zeroext, i8*, i64, i32 zeroext) ; CHECK: declare void @__dfsan_maybe_store_origin(i[[#SBITS]] zeroext, i8*, i64, i32 zeroext)

View File

@ -0,0 +1,32 @@
; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-8-labels -S | FileCheck %s
; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-16-labels -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
define i64 @load64(i64* %p) {
; CHECK-LABEL: @"dfs$load64"
; CHECK-NEXT: %[[#PO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
; CHECK-NEXT: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN:2]]
; CHECK-NEXT: %[[#INTP:]] = bitcast i64* %p to i8*
; CHECK-NEXT: %[[#LABEL_ORIGIN:]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* %[[#INTP]], i64 8)
; CHECK-NEXT: %[[#LABEL_ORIGIN_H32:]] = lshr i64 %[[#LABEL_ORIGIN]], 32
; CHECK-NEXT: %[[#LABEL:]] = trunc i64 %[[#LABEL_ORIGIN_H32]] to i[[#SBITS]]
; CHECK-NEXT: %[[#ORIGIN:]] = trunc i64 %[[#LABEL_ORIGIN]] to i32
; CHECK-NEXT: %[[#ORIGIN_CHAINED:]] = call i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] %[[#LABEL]], i32 %[[#ORIGIN]])
; CHECK-NEXT: %[[#LABEL:]] = or i[[#SBITS]] %[[#LABEL]], %[[#PS]]
; CHECK-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0
; CHECK-NEXT: %[[#ORIGIN_SEL:]] = select i1 %[[#NZ]], i32 %[[#PO]], i32 %[[#ORIGIN_CHAINED]]
; CHECK-NEXT: %a = load i64, i64* %p
; CHECK-NEXT: store i[[#SBITS]] %[[#LABEL]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]]
; CHECK-NEXT: store i32 %[[#ORIGIN_SEL]], i32* @__dfsan_retval_origin_tls, align 4
%a = load i64, i64* %p
ret i64 %a
}