[analyzer] Add custom filter functions for GenericTaintChecker

This patch is the last of the series of patches which allow the user to
annotate their functions with taint propagation rules.

I implemented the use of the configured filtering functions. These
functions can remove taintedness from the symbols which are passed at
the specified arguments to the filters.

Differential Revision: https://reviews.llvm.org/D59516
This commit is contained in:
Borsik Gabor 2019-09-08 23:06:37 +02:00
parent 9cd4034c0a
commit 89bc4c662c
5 changed files with 124 additions and 58 deletions

View File

@ -98,13 +98,20 @@ private:
} }
/// Catch taint related bugs. Check if tainted data is passed to a /// Catch taint related bugs. Check if tainted data is passed to a
/// system call etc. /// system call etc. Returns true on matching.
bool checkPre(const CallExpr *CE, CheckerContext &C) const; bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name,
CheckerContext &C) const;
/// Add taint sources on a pre-visit. /// Add taint sources on a pre-visit. Returns true on matching.
void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl,
StringRef Name, CheckerContext &C) const;
/// Propagate taint generated at pre-visit. /// Mark filter's arguments not tainted on a pre-visit. Returns true on
/// matching.
bool addFiltersPre(const CallExpr *CE, StringRef Name,
CheckerContext &C) const;
/// Propagate taint generated at pre-visit. Returns true on matching.
bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
/// Check if the region the expression evaluates to is the standard input, /// Check if the region the expression evaluates to is the standard input,
@ -442,14 +449,26 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
void GenericTaintChecker::checkPreStmt(const CallExpr *CE, void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
CheckerContext &C) const { CheckerContext &C) const {
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
// Check for non-global functions.
if (!FDecl || FDecl->getKind() != Decl::Function)
return;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return;
// Check for taintedness related errors first: system call, uncontrolled // Check for taintedness related errors first: system call, uncontrolled
// format string, tainted buffer size. // format string, tainted buffer size.
if (checkPre(CE, C)) if (checkPre(CE, FDecl, Name, C))
return; return;
// Marks the function's arguments and/or return value tainted if it present in // Marks the function's arguments and/or return value tainted if it present in
// the list. // the list.
addSourcesPre(CE, C); if (addSourcesPre(CE, FDecl, Name, C))
return;
addFiltersPre(CE, Name, C);
} }
void GenericTaintChecker::checkPostStmt(const CallExpr *CE, void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
@ -464,31 +483,46 @@ void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
printTaint(State, Out, NL, Sep); printTaint(State, Out, NL, Sep);
} }
void GenericTaintChecker::addSourcesPre(const CallExpr *CE, bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
const FunctionDecl *FDecl,
StringRef Name,
CheckerContext &C) const { CheckerContext &C) const {
ProgramStateRef State = nullptr;
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || FDecl->getKind() != Decl::Function)
return;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return;
// First, try generating a propagation rule for this function. // First, try generating a propagation rule for this function.
TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
this->CustomPropagations, FDecl, Name, C); this->CustomPropagations, FDecl, Name, C);
if (!Rule.isNull()) { if (!Rule.isNull()) {
State = Rule.process(CE, C); ProgramStateRef State = Rule.process(CE, C);
if (!State) if (State) {
return; C.addTransition(State);
C.addTransition(State); return true;
return; }
}
return false;
}
bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name,
CheckerContext &C) const {
auto It = CustomFilters.find(Name);
if (It == CustomFilters.end())
return false;
ProgramStateRef State = C.getState();
const ArgVector &Args = It->getValue();
for (unsigned ArgNum : Args) {
if (ArgNum >= CE->getNumArgs())
continue;
const Expr *Arg = CE->getArg(ArgNum);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = removeTaint(State, *V);
} }
if (!State) if (State != C.getState()) {
return; C.addTransition(State);
C.addTransition(State); return true;
}
return false;
} }
bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
@ -530,19 +564,12 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
} }
bool GenericTaintChecker::checkPre(const CallExpr *CE, bool GenericTaintChecker::checkPre(const CallExpr *CE,
const FunctionDecl *FDecl, StringRef Name,
CheckerContext &C) const { CheckerContext &C) const {
if (checkUncontrolledFormatString(CE, C)) if (checkUncontrolledFormatString(CE, C))
return true; return true;
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || FDecl->getKind() != Decl::Function)
return false;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return false;
if (checkSystemCall(CE, Name, C)) if (checkSystemCall(CE, Name, C))
return true; return true;

View File

@ -37,9 +37,7 @@ void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
Out << I.first << " : " << I.second << NL; Out << I.first << " : " << I.second << NL;
} }
void dumpTaint(ProgramStateRef State) { void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
printTaint(State, llvm::errs());
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx, const LocationContext *LCtx,
@ -64,8 +62,8 @@ ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
// region of the parent region. // region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<SVal> binding = if (Optional<SVal> binding =
State->getStateManager().getStoreManager() State->getStateManager().getStoreManager().getDefaultBinding(
.getDefaultBinding(*LCV)) { *LCV)) {
if (SymbolRef Sym = binding->getAsSymbol()) if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(State, Sym, LCV->getRegion(), Kind); return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
} }
@ -94,6 +92,32 @@ ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
return NewState; return NewState;
} }
ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return removeTaint(State, Sym);
const MemRegion *R = V.getAsRegion();
return removeTaint(State, R);
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return removeTaint(State, SR->getSymbol());
return State;
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->remove<TaintMap>(Sym);
assert(NewState);
return NewState;
}
ProgramStateRef taint::addPartialTaint(ProgramStateRef State, ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
SymbolRef ParentSym, SymbolRef ParentSym,
const SubRegion *SubRegion, const SubRegion *SubRegion,
@ -157,7 +181,8 @@ bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
// Traverse all the symbols this symbol depends on to see if any are tainted. // Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
SE = Sym->symbol_end(); SI != SE; ++SI) { SE = Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI)) if (!isa<SymbolData>(*SI))
continue; continue;

View File

@ -27,34 +27,39 @@ using TaintTagType = unsigned;
static constexpr TaintTagType TaintTagGeneric = 0; static constexpr TaintTagType TaintTagGeneric = 0;
/// Create a new state in which the value of the statement is marked as tainted. /// Create a new state in which the value of the statement is marked as tainted.
LLVM_NODISCARD ProgramStateRef LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric); TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the value is marked as tainted. /// Create a new state in which the value is marked as tainted.
LLVM_NODISCARD ProgramStateRef LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
addTaint(ProgramStateRef State, SVal V, TaintTagType Kind = TaintTagGeneric);
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the symbol is marked as tainted. /// Create a new state in which the symbol is marked as tainted.
LLVM_NODISCARD ProgramStateRef LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
addTaint(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind = TaintTagGeneric);
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the pointer represented by the region /// Create a new state in which the pointer represented by the region
/// is marked as tainted. /// is marked as tainted.
LLVM_NODISCARD ProgramStateRef LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
addTaint(ProgramStateRef State, const MemRegion *R, const MemRegion *R,
TaintTagType Kind = TaintTagGeneric); TaintTagType Kind = TaintTagGeneric);
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V);
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
const MemRegion *R);
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State,
SymbolRef Sym);
/// Create a new state in a which a sub-region of a given symbol is tainted. /// Create a new state in a which a sub-region of a given symbol is tainted.
/// This might be necessary when referring to regions that can not have an /// This might be necessary when referring to regions that can not have an
/// individual symbol, e.g. if they are represented by the default binding of /// individual symbol, e.g. if they are represented by the default binding of
/// a LazyCompoundVal. /// a LazyCompoundVal.
LLVM_NODISCARD ProgramStateRef LLVM_NODISCARD ProgramStateRef addPartialTaint(
addPartialTaint(ProgramStateRef State, ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,
SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind = TaintTagGeneric);
TaintTagType Kind = TaintTagGeneric);
/// Check if the statement has a tainted value in the given state. /// Check if the statement has a tainted value in the given state.
bool isTainted(ProgramStateRef State, const Stmt *S, bool isTainted(ProgramStateRef State, const Stmt *S,
@ -99,4 +104,3 @@ public:
} // namespace clang } // namespace clang
#endif #endif

View File

@ -36,8 +36,8 @@ Propagations:
# A list of filter functions # A list of filter functions
Filters: Filters:
# int x; // x is tainted # int x; // x is tainted
# myFilter(&x); // x is not tainted anymore # isOutOfRange(&x); // x is not tainted anymore
- Name: myFilter - Name: isOutOfRange
Args: [0] Args: [0]
# A list of sink functions # A list of sink functions

View File

@ -56,6 +56,8 @@ extern struct _FILE *stdin;
extern FILE *stdin; extern FILE *stdin;
#endif #endif
#define bool _Bool
int fscanf(FILE *restrict stream, const char *restrict format, ...); int fscanf(FILE *restrict stream, const char *restrict format, ...);
int sprintf(char *str, const char *format, ...); int sprintf(char *str, const char *format, ...);
void setproctitle(const char *fmt, ...); void setproctitle(const char *fmt, ...);
@ -346,6 +348,7 @@ void mySource2(int*);
void myScanf(const char*, ...); void myScanf(const char*, ...);
int myPropagator(int, int*); int myPropagator(int, int*);
int mySnprintf(char*, size_t, const char*, ...); int mySnprintf(char*, size_t, const char*, ...);
bool isOutOfRange(const int*);
void mySink(int, int, int); void mySink(int, int, int);
void testConfigurationSources1() { void testConfigurationSources1() {
@ -372,6 +375,13 @@ void testConfigurationPropagation() {
Buffer[y] = 1; // expected-warning {{Out of bound memory access }} Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
} }
void testConfigurationFilter() {
int x = mySource1();
if (isOutOfRange(&x)) // the filter function
return;
Buffer[x] = 1; // no-warning
}
void testConfigurationSinks() { void testConfigurationSinks() {
int x = mySource1(); int x = mySource1();
mySink(x, 1, 2); mySink(x, 1, 2);