[analyzer] Move taint API from ProgramState to a separate header. NFC.

It is now an inter-checker communication API, similar to the one that
connects MallocChecker/CStringChecker/InnerPointerChecker: simply a set of
setters and getters for a state trait.

Differential Revision: https://reviews.llvm.org/D59861

llvm-svn: 357326
This commit is contained in:
Artem Dergachev 2019-03-29 22:49:30 +00:00
parent 60cde76f70
commit 44551cf693
17 changed files with 375 additions and 375 deletions

View File

@ -307,20 +307,6 @@ public:
BugReport &BR) override;
};
/// The bug visitor prints a diagnostic message at the location where a given
/// variable was tainted.
class TaintBugVisitor final : public BugReporterVisitor {
private:
const SVal V;
public:
TaintBugVisitor(const SVal V) : V(V) {}
void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
BugReporterContext &BRC,
BugReport &BR) override;
};
/// The bug visitor will walk all the nodes in a path and collect all the
/// constraints. When it reaches the root node, will create a refutation

View File

@ -20,7 +20,6 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/Support/Allocator.h"
@ -43,7 +42,6 @@ typedef std::unique_ptr<ConstraintManager>(*ConstraintManagerCreator)(
ProgramStateManager &, SubEngine *);
typedef std::unique_ptr<StoreManager>(*StoreManagerCreator)(
ProgramStateManager &);
typedef llvm::ImmutableMap<const SubRegion*, TaintTagType> TaintedSubRegions;
//===----------------------------------------------------------------------===//
// ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState.
@ -367,38 +365,6 @@ public:
template <typename CB> CB
scanReachableSymbols(llvm::iterator_range<region_iterator> Reachable) const;
/// Create a new state in which the statement is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;
/// Create a new state in which the value is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
/// Create a new state in which the symbol is marked as tainted.
LLVM_NODISCARD ProgramStateRef addTaint(SymbolRef S,
TaintTagType Kind = TaintTagGeneric) const;
/// Create a new state in which the region symbol is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(const MemRegion *R, TaintTagType Kind = TaintTagGeneric) const;
/// Create a new state in a which a sub-region of a given symbol is tainted.
/// This might be necessary when referring to regions that can not have an
/// individual symbol, e.g. if they are represented by the default binding of
/// a LazyCompoundVal.
LLVM_NODISCARD ProgramStateRef
addPartialTaint(SymbolRef ParentSym, const SubRegion *SubRegion,
TaintTagType Kind = TaintTagGeneric) const;
/// Check if the statement is tainted in the current state.
bool isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;
bool isTainted(SVal V, TaintTagType Kind = TaintTagGeneric) const;
bool isTainted(SymbolRef Sym, TaintTagType Kind = TaintTagGeneric) const;
bool isTainted(const MemRegion *Reg, TaintTagType Kind=TaintTagGeneric) const;
//==---------------------------------------------------------------------==//
// Accessing the Generic Data Map (GDM).
//==---------------------------------------------------------------------==//
@ -462,10 +428,8 @@ public:
const LocationContext *CurrentLC = nullptr) const;
void printDOT(raw_ostream &Out,
const LocationContext *CurrentLC = nullptr) const;
void printTaint(raw_ostream &Out, const char *nl = "\n") const;
void dump() const;
void dumpTaint() const;
private:
friend void ProgramStateRetain(const ProgramState *state);
@ -499,7 +463,6 @@ private:
std::unique_ptr<ConstraintManager> ConstraintMgr;
ProgramState::GenericDataMap::Factory GDMFactory;
TaintedSubRegions::Factory TSRFactory;
typedef llvm::DenseMap<void*,std::pair<void*,void (*)(void*)> > GDMContextsTy;
GDMContextsTy GDMContexts;

View File

@ -1,58 +0,0 @@
//===- TaintManager.h - Managing taint --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides APIs for adding, removing, querying symbol taint.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h"
#include "llvm/ADT/ImmutableMap.h"
namespace clang {
namespace ento {
/// The GDM component containing the tainted root symbols. We lazily infer the
/// taint of the dependent symbols. Currently, this is a map from a symbol to
/// tag kind. TODO: Should support multiple tag kinds.
// FIXME: This does not use the nice trait macros because it must be accessible
// from multiple translation units.
struct TaintMap {};
using TaintMapImpl = llvm::ImmutableMap<SymbolRef, TaintTagType>;
template<> struct ProgramStateTrait<TaintMap>
: public ProgramStatePartialTrait<TaintMapImpl> {
static void *GDMIndex();
};
/// The GDM component mapping derived symbols' parent symbols to their
/// underlying regions. This is used to efficiently check whether a symbol is
/// tainted when it represents a sub-region of a tainted symbol.
struct DerivedSymTaint {};
using DerivedSymTaintImpl = llvm::ImmutableMap<SymbolRef, TaintedSubRegions>;
template<> struct ProgramStateTrait<DerivedSymTaint>
: public ProgramStatePartialTrait<DerivedSymTaintImpl> {
static void *GDMIndex();
};
class TaintManager {
TaintManager() = default;
};
} // namespace ento
} // namespace clang
#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H

View File

@ -1,29 +0,0 @@
//===- TaintTag.h - Path-sensitive "State" for tracking values --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines a set of taint tags. Several tags are used to differentiate kinds
// of taint.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H
namespace clang {
namespace ento {
/// The type of taint, which helps to differentiate between different types of
/// taint.
using TaintTagType = unsigned;
static const TaintTagType TaintTagGeneric = 0;
} // namespace ento
} // namespace clang
#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H

View File

@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/CharUnits.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@ -24,6 +25,7 @@
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class ArrayBoundCheckerV2 :
@ -204,7 +206,7 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// If we are under constrained and the index variables are tainted, report.
if (state_exceedsUpperBound && state_withinUpperBound) {
SVal ByteOffset = rawOffset.getByteOffset();
if (state->isTainted(ByteOffset)) {
if (isTainted(state, ByteOffset)) {
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
llvm::make_unique<TaintBugVisitor>(ByteOffset));
return;

View File

@ -87,6 +87,7 @@ add_clang_library(clangStaticAnalyzerCheckers
StackAddrEscapeChecker.cpp
StdLibraryFunctionsChecker.cpp
StreamChecker.cpp
Taint.cpp
TaintTesterChecker.cpp
TestAfterDivZeroChecker.cpp
TraversalChecker.cpp

View File

@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
@ -19,6 +20,7 @@
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > {
@ -83,10 +85,10 @@ void DivZeroChecker::checkPreStmt(const BinaryOperator *B,
return;
}
bool TaintedD = C.getState()->isTainted(*DV);
bool TaintedD = isTainted(C.getState(), *DV);
if ((stateNotZero && stateZero && TaintedD)) {
reportBug("Division by a tainted value, possibly zero", stateZero, C,
llvm::make_unique<TaintBugVisitor>(*DV));
llvm::make_unique<taint::TaintBugVisitor>(*DV));
return;
}

View File

@ -13,6 +13,8 @@
// aggressively, even if the involved symbols are under constrained.
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
@ -27,6 +29,7 @@
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class GenericTaintChecker
@ -41,6 +44,9 @@ public:
void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
void printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const override;
private:
static const unsigned InvalidArgIndex = UINT_MAX;
/// Denotes the return vale.
@ -152,14 +158,14 @@ private:
static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
CheckerContext &C) {
if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
return true;
if (!E->getType().getTypePtr()->isPointerType())
return false;
Optional<SVal> V = getPointedToSVal(C, E);
return (V && State->isTainted(*V));
return (V && isTainted(State, *V));
}
/// Pre-process a function which propagates taint according to the
@ -313,6 +319,11 @@ void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
propagateFromPre(CE, C);
}
void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const {
printTaint(State, Out, NL, Sep);
}
void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = nullptr;
@ -354,7 +365,7 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
for (unsigned ArgNum : TaintArgs) {
// Special handling for the tainted return value.
if (ArgNum == ReturnValueIndex) {
State = State->addTaint(CE, C.getLocationContext());
State = addTaint(State, CE, C.getLocationContext());
continue;
}
@ -365,7 +376,7 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
const Expr *Arg = CE->getArg(ArgNum);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = State->addTaint(*V);
State = addTaint(State, *V);
}
// Clear up the taint info from the state.
@ -570,9 +581,9 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
ProgramStateRef State = C.getState();
Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
SVal TaintedSVal;
if (PointedToSVal && State->isTainted(*PointedToSVal))
if (PointedToSVal && isTainted(State, *PointedToSVal))
TaintedSVal = *PointedToSVal;
else if (State->isTainted(E, C.getLocationContext()))
else if (isTainted(State, E, C.getLocationContext()))
TaintedSVal = C.getSVal(E);
else
return false;

View File

@ -0,0 +1,227 @@
//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
using namespace clang;
using namespace ento;
using namespace taint;
// Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
// Partially tainted symbols.
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
TaintTagType);
REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
const char *Sep) {
TaintMapTy TM = State->get<TaintMap>();
if (!TM.isEmpty())
Out << "Tainted symbols:" << NL;
for (const auto &I : TM)
Out << I.first << " : " << I.second << NL;
}
void dumpTaint(ProgramStateRef State) {
printTaint(State, llvm::errs());
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind) {
return addTaint(State, State->getSVal(S, LCtx), Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
TaintTagType Kind) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(State, Sym, Kind);
// If the SVal represents a structure, try to mass-taint all values within the
// structure. For now it only works efficiently on lazy compound values that
// were conjured during a conservative evaluation of a function - either as
// return values of functions that return structures or arrays by value, or as
// values of structures or arrays passed into the function by reference,
// directly or through pointer aliasing. Such lazy compound values are
// characterized by having exactly one binding in their captured store within
// their parent region, which is a conjured symbol default-bound to the base
// region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<SVal> binding =
State->getStateManager().getStoreManager()
.getDefaultBinding(*LCV)) {
if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
}
}
const MemRegion *R = V.getAsRegion();
return addTaint(State, R, Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
TaintTagType Kind) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return addTaint(State, SR->getSymbol(), Kind);
return State;
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
TaintTagType Kind) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
assert(NewState);
return NewState;
}
ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind) {
// Ignore partial taint if the entire parent symbol is already tainted.
if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
if (*T == Kind)
return State;
// Partial taint applies if only a portion of the symbol is tainted.
if (SubRegion == SubRegion->getBaseRegion())
return addTaint(State, ParentSym, Kind);
const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
Regs = F.add(Regs, SubRegion, Kind);
ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
assert(NewState);
return NewState;
}
bool taint::isTainted(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx, TaintTagType Kind) {
SVal val = State->getSVal(S, LCtx);
return isTainted(State, val, Kind);
}
bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
if (const SymExpr *Sym = V.getAsSymExpr())
return isTainted(State, Sym, Kind);
if (const MemRegion *Reg = V.getAsRegion())
return isTainted(State, Reg, Kind);
return false;
}
bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
TaintTagType K) {
if (!Reg)
return false;
// Element region (array element) is tainted if either the base or the offset
// are tainted.
if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K) ||
isTainted(State, ER->getIndex(), K);
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
return isTainted(State, SR->getSymbol(), K);
if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K);
return false;
}
bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
if (!Sym)
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
SE = Sym->symbol_end(); SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
if (*Tag == Kind)
return true;
}
if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (isTainted(State, SD->getParentSymbol(), Kind))
return true;
// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
// symbol, it's also tainted.
if (const TaintedSubRegions *Regs =
State->get<DerivedSymTaint>(SD->getParentSymbol())) {
const TypedValueRegion *R = SD->getRegion();
for (auto I : *Regs) {
// FIXME: The logic to identify tainted regions could be more
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
if (Kind == I.second && R->isSubRegionOf(I.first))
return true;
}
}
}
// If memory region is tainted, data is also tainted.
if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
if (isTainted(State, SRV->getRegion(), Kind))
return true;
}
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
if (isTainted(State, SC->getOperand(), Kind))
return true;
}
}
return false;
}
std::shared_ptr<PathDiagnosticPiece>
TaintBugVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC,
BugReport &BR) {
// Find the ExplodedNode where the taint was first introduced
if (!isTainted(N->getState(), V) ||
isTainted(N->getFirstPred()->getState(), V))
return nullptr;
const Stmt *S = PathDiagnosticLocation::getStmt(N);
if (!S)
return nullptr;
const LocationContext *NCtx = N->getLocationContext();
PathDiagnosticLocation L =
PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
if (!L.isValid() || !L.asLocation().isValid())
return nullptr;
return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
}

View File

@ -0,0 +1,102 @@
//=== Taint.h - Taint tracking and basic propagation rules. --------*- C++ -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H
#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
namespace clang {
namespace ento {
namespace taint {
/// The type of taint, which helps to differentiate between different types of
/// taint.
using TaintTagType = unsigned;
static constexpr TaintTagType TaintTagGeneric = 0;
/// Create a new state in which the value of the statement is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the value is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(ProgramStateRef State, SVal V,
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the symbol is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(ProgramStateRef State, SymbolRef Sym,
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in which the pointer represented by the region
/// is marked as tainted.
LLVM_NODISCARD ProgramStateRef
addTaint(ProgramStateRef State, const MemRegion *R,
TaintTagType Kind = TaintTagGeneric);
/// Create a new state in a which a sub-region of a given symbol is tainted.
/// This might be necessary when referring to regions that can not have an
/// individual symbol, e.g. if they are represented by the default binding of
/// a LazyCompoundVal.
LLVM_NODISCARD ProgramStateRef
addPartialTaint(ProgramStateRef State,
SymbolRef ParentSym, const SubRegion *SubRegion,
TaintTagType Kind = TaintTagGeneric);
/// Check if the statement has a tainted value in the given state.
bool isTainted(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric);
/// Check if the value is tainted in the given state.
bool isTainted(ProgramStateRef State, SVal V,
TaintTagType Kind = TaintTagGeneric);
/// Check if the symbol is tainted in the given state.
bool isTainted(ProgramStateRef State, SymbolRef Sym,
TaintTagType Kind = TaintTagGeneric);
/// Check if the pointer represented by the region is tainted in the given
/// state.
bool isTainted(ProgramStateRef State, const MemRegion *Reg,
TaintTagType Kind = TaintTagGeneric);
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n",
const char *sep = "");
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State);
/// The bug visitor prints a diagnostic message at the location where a given
/// variable was tainted.
class TaintBugVisitor final : public BugReporterVisitor {
private:
const SVal V;
public:
TaintBugVisitor(const SVal V) : V(V) {}
void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
BugReporterContext &BRC,
BugReport &BR) override;
};
} // namespace taint
} // namespace ento
} // namespace clang
#endif

View File

@ -9,6 +9,8 @@
// This checker can be used for testing how taint data is propagated.
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
@ -17,6 +19,7 @@
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class TaintTesterChecker : public Checker< check::PostStmt<Expr> > {
@ -46,7 +49,7 @@ void TaintTesterChecker::checkPostStmt(const Expr *E,
if (!State)
return;
if (State->isTainted(E, C.getLocationContext())) {
if (isTainted(State, E, C.getLocationContext())) {
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
initBugType();
auto report = llvm::make_unique<BugReport>(*BT, "tainted",N);

View File

@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/CharUnits.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@ -25,6 +26,7 @@
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class VLASizeChecker : public Checker< check::PreStmt<DeclStmt> > {
@ -106,7 +108,7 @@ void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
return;
// Check if the size is tainted.
if (state->isTainted(sizeV)) {
if (isTainted(state, sizeV)) {
reportBug(VLA_Tainted, SE, nullptr, C,
llvm::make_unique<TaintBugVisitor>(sizeV));
return;

View File

@ -2412,26 +2412,6 @@ CXXSelfAssignmentBRVisitor::VisitNode(const ExplodedNode *Succ,
return std::move(Piece);
}
std::shared_ptr<PathDiagnosticPiece>
TaintBugVisitor::VisitNode(const ExplodedNode *N,
BugReporterContext &BRC, BugReport &) {
// Find the ExplodedNode where the taint was first introduced
if (!N->getState()->isTainted(V) || N->getFirstPred()->getState()->isTainted(V))
return nullptr;
const Stmt *S = PathDiagnosticLocation::getStmt(N);
if (!S)
return nullptr;
const LocationContext *NCtx = N->getLocationContext();
PathDiagnosticLocation L =
PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
if (!L.isValid() || !L.asLocation().isValid())
return nullptr;
return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
}
FalsePositiveRefutationBRVisitor::FalsePositiveRefutationBRVisitor()
: Constraints(ConstraintRangeTy::Factory().getEmptyMap()) {}

View File

@ -45,7 +45,6 @@ add_clang_library(clangStaticAnalyzerCore
SValBuilder.cpp
SVals.cpp
SymbolManager.cpp
TaintManager.cpp
WorkList.cpp
LINK_LIBS

View File

@ -16,7 +16,6 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
#include "llvm/Support/raw_ostream.h"
@ -458,9 +457,6 @@ void ProgramState::print(raw_ostream &Out,
// Print out the tracked dynamic types.
printDynamicTypeInfo(this, Out, NL, Sep);
// Print out tainted symbols.
printTaint(Out, NL);
// Print checker-specific data.
Mgr.getOwningEngine().printState(Out, this, NL, Sep, LC);
}
@ -474,22 +470,6 @@ LLVM_DUMP_METHOD void ProgramState::dump() const {
print(llvm::errs());
}
void ProgramState::printTaint(raw_ostream &Out,
const char *NL) const {
TaintMapImpl TM = get<TaintMap>();
if (!TM.isEmpty())
Out <<"Tainted symbols:" << NL;
for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) {
Out << I->first << " : " << I->second << NL;
}
}
void ProgramState::dumpTaint() const {
printTaint(llvm::errs());
}
AnalysisManager& ProgramState::getAnalysisManager() const {
return stateMgr->getOwningEngine().getAnalysisManager();
}
@ -657,166 +637,3 @@ bool ProgramState::scanReachableSymbols(
}
return true;
}
ProgramStateRef ProgramState::addTaint(const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind) const {
if (const Expr *E = dyn_cast_or_null<Expr>(S))
S = E->IgnoreParens();
return addTaint(getSVal(S, LCtx), Kind);
}
ProgramStateRef ProgramState::addTaint(SVal V,
TaintTagType Kind) const {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(Sym, Kind);
// If the SVal represents a structure, try to mass-taint all values within the
// structure. For now it only works efficiently on lazy compound values that
// were conjured during a conservative evaluation of a function - either as
// return values of functions that return structures or arrays by value, or as
// values of structures or arrays passed into the function by reference,
// directly or through pointer aliasing. Such lazy compound values are
// characterized by having exactly one binding in their captured store within
// their parent region, which is a conjured symbol default-bound to the base
// region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(Sym, LCV->getRegion(), Kind);
}
}
const MemRegion *R = V.getAsRegion();
return addTaint(R, Kind);
}
ProgramStateRef ProgramState::addTaint(const MemRegion *R,
TaintTagType Kind) const {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return addTaint(SR->getSymbol(), Kind);
return this;
}
ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
TaintTagType Kind) const {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = set<TaintMap>(Sym, Kind);
assert(NewState);
return NewState;
}
ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind) const {
// Ignore partial taint if the entire parent symbol is already tainted.
if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
return this;
// Partial taint applies if only a portion of the symbol is tainted.
if (SubRegion == SubRegion->getBaseRegion())
return addTaint(ParentSym, Kind);
const TaintedSubRegions *SavedRegs = get<DerivedSymTaint>(ParentSym);
TaintedSubRegions Regs =
SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap();
Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind);
ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, Regs);
assert(NewState);
return NewState;
}
bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind) const {
if (const Expr *E = dyn_cast_or_null<Expr>(S))
S = E->IgnoreParens();
SVal val = getSVal(S, LCtx);
return isTainted(val, Kind);
}
bool ProgramState::isTainted(SVal V, TaintTagType Kind) const {
if (const SymExpr *Sym = V.getAsSymExpr())
return isTainted(Sym, Kind);
if (const MemRegion *Reg = V.getAsRegion())
return isTainted(Reg, Kind);
return false;
}
bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const {
if (!Reg)
return false;
// Element region (array element) is tainted if either the base or the offset
// are tainted.
if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
return isTainted(ER->getSuperRegion(), K) || isTainted(ER->getIndex(), K);
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
return isTainted(SR->getSymbol(), K);
if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
return isTainted(ER->getSuperRegion(), K);
return false;
}
bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
if (!Sym)
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
if (*Tag == Kind)
return true;
}
if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (isTainted(SD->getParentSymbol(), Kind))
return true;
// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
// symbol, it's also tainted.
if (const TaintedSubRegions *Regs =
get<DerivedSymTaint>(SD->getParentSymbol())) {
const TypedValueRegion *R = SD->getRegion();
for (auto I : *Regs) {
// FIXME: The logic to identify tainted regions could be more
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
if (Kind == I.second && R->isSubRegionOf(I.first))
return true;
}
}
}
// If memory region is tainted, data is also tainted.
if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
if (isTainted(SRV->getRegion(), Kind))
return true;
}
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
if (isTainted(SC->getOperand(), Kind))
return true;
}
}
return false;
}

View File

@ -1,22 +0,0 @@
//== TaintManager.cpp ------------------------------------------ -*- C++ -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h"
using namespace clang;
using namespace ento;
void *ProgramStateTrait<TaintMap>::GDMIndex() {
static int index = 0;
return &index;
}
void *ProgramStateTrait<DerivedSymTaint>::GDMIndex() {
static int index;
return &index;
}

View File

@ -0,0 +1,14 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint\
// RUN: -analyzer-checker=debug.ExprInspection %s\
// RUN: 2>&1 | FileCheck %s
void clang_analyzer_printState();
int getchar();
// CHECK: Tainted symbols:
// CHECK-NEXT: conj_$2{{.*}} : 0
int test_taint_dumps() {
int x = getchar();
clang_analyzer_printState();
return x;
}