[analyzer] Improve pointer arithmetic checker.

This patch is intended to improve pointer arithmetic checker.
From now on it only warns when the pointer arithmetic is likely to cause an
error. For example when the pointer points to a single object, or an array of
derived types.

Differential Revision: http://reviews.llvm.org/D14203

llvm-svn: 261632
This commit is contained in:
Gabor Horvath 2016-02-23 12:34:39 +00:00
parent 7326c01aaa
commit d1abcf799e
6 changed files with 387 additions and 35 deletions

View File

@ -13,55 +13,329 @@
//===----------------------------------------------------------------------===//
#include "ClangSACheckers.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/ExprCXX.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "llvm/ADT/SmallVector.h"
using namespace clang;
using namespace ento;
namespace {
enum class AllocKind {
SingleObject,
Array,
Unknown,
Reinterpreted // Single object interpreted as an array.
};
} // end namespace
namespace llvm {
template <> struct FoldingSetTrait<AllocKind> {
static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
ID.AddInteger(static_cast<int>(X));
}
};
} // end namespace llvm
namespace {
class PointerArithChecker
: public Checker< check::PreStmt<BinaryOperator> > {
mutable std::unique_ptr<BuiltinBug> BT;
: public Checker<
check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
check::PostStmt<CallExpr>, check::DeadSymbols> {
AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
AllocKind &AKind, CheckerContext &C) const;
const MemRegion *getPointedRegion(const MemRegion *Region,
CheckerContext &C) const;
void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
bool PointedNeeded = false) const;
void initAllocIdentifiers(ASTContext &C) const;
mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
mutable std::unique_ptr<BuiltinBug> BT_polyArray;
mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
public:
void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
};
} // end namespace
REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
CheckerContext &C) const {
// TODO: intentional leak. Some information is garbage collected too early,
// see http://reviews.llvm.org/D14203 for further information.
/*ProgramStateRef State = C.getState();
RegionStateTy RegionStates = State->get<RegionState>();
for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end();
I != E; ++I) {
if (!SR.isLiveRegion(I->first))
State = State->remove<RegionState>(I->first);
}
C.addTransition(State);*/
}
void PointerArithChecker::checkPreStmt(const BinaryOperator *B,
CheckerContext &C) const {
if (B->getOpcode() != BO_Sub && B->getOpcode() != BO_Add)
AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
const FunctionDecl *FD) const {
// This checker try not to assume anything about placement and overloaded
// new to avoid false positives.
if (isa<CXXMethodDecl>(FD))
return AllocKind::Unknown;
if (FD->getNumParams() != 1 || FD->isVariadic())
return AllocKind::Unknown;
if (NE->isArray())
return AllocKind::Array;
return AllocKind::SingleObject;
}
const MemRegion *
PointerArithChecker::getPointedRegion(const MemRegion *Region,
CheckerContext &C) const {
assert(Region);
ProgramStateRef State = C.getState();
SVal S = State->getSVal(Region);
return S.getAsRegion();
}
/// Checks whether a region is the part of an array.
/// In case there is a dericed to base cast above the array element, the
/// Polymorphic output value is set to true. AKind output value is set to the
/// allocation kind of the inspected region.
const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
bool &Polymorphic,
AllocKind &AKind,
CheckerContext &C) const {
assert(Region);
while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) {
Region = Region->getAs<CXXBaseObjectRegion>()->getSuperRegion();
Polymorphic = true;
}
if (Region->getKind() == MemRegion::Kind::ElementRegionKind) {
Region = Region->getAs<ElementRegion>()->getSuperRegion();
}
ProgramStateRef State = C.getState();
if (const AllocKind *Kind = State->get<RegionState>(Region)) {
AKind = *Kind;
if (*Kind == AllocKind::Array)
return Region;
else
return nullptr;
}
// When the region is symbolic and we do not have any information about it,
// assume that this is an array to avoid false positives.
if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
return Region;
// No AllocKind stored and not symbolic, assume that it points to a single
// object.
return nullptr;
}
void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
CheckerContext &C,
bool PointedNeeded) const {
SourceRange SR = E->getSourceRange();
if (SR.isInvalid())
return;
ProgramStateRef state = C.getState();
const LocationContext *LCtx = C.getLocationContext();
SVal LV = state->getSVal(B->getLHS(), LCtx);
SVal RV = state->getSVal(B->getRHS(), LCtx);
const MemRegion *LR = LV.getAsRegion();
if (!LR || !RV.isConstant())
ProgramStateRef State = C.getState();
const MemRegion *Region =
State->getSVal(E, C.getLocationContext()).getAsRegion();
if (!Region)
return;
if (PointedNeeded)
Region = getPointedRegion(Region, C);
if (!Region)
return;
// If pointer arithmetic is done on variables of non-array type, this often
// means behavior rely on memory organization, which is dangerous.
if (isa<VarRegion>(LR) || isa<CodeTextRegion>(LR) ||
isa<CompoundLiteralRegion>(LR)) {
bool IsPolymorphic = false;
AllocKind Kind = AllocKind::Unknown;
if (const MemRegion *ArrayRegion =
getArrayRegion(Region, IsPolymorphic, Kind, C)) {
if (!IsPolymorphic)
return;
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
if (!BT)
BT.reset(
new BuiltinBug(this, "Dangerous pointer arithmetic",
"Pointer arithmetic done on non-array variables "
"means reliance on memory layout, which is "
"dangerous."));
auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
R->addRange(B->getSourceRange());
if (!BT_polyArray)
BT_polyArray.reset(new BuiltinBug(
this, "Dangerous pointer arithmetic",
"Pointer arithmetic on a pointer to base class is dangerous "
"because derived and base class may have different size."));
auto R = llvm::make_unique<BugReport>(*BT_polyArray,
BT_polyArray->getDescription(), N);
R->addRange(E->getSourceRange());
R->markInteresting(ArrayRegion);
C.emitReport(std::move(R));
}
return;
}
if (Kind == AllocKind::Reinterpreted)
return;
// We might not have enough information about symbolic regions.
if (Kind != AllocKind::SingleObject &&
Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
return;
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
if (!BT_pointerArith)
BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
"Pointer arithmetic on non-array "
"variables relies on memory layout, "
"which is dangerous."));
auto R = llvm::make_unique<BugReport>(*BT_pointerArith,
BT_pointerArith->getDescription(), N);
R->addRange(SR);
R->markInteresting(Region);
C.emitReport(std::move(R));
}
}
void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
if (!AllocFunctions.empty())
return;
AllocFunctions.insert(&C.Idents.get("alloca"));
AllocFunctions.insert(&C.Idents.get("malloc"));
AllocFunctions.insert(&C.Idents.get("realloc"));
AllocFunctions.insert(&C.Idents.get("calloc"));
AllocFunctions.insert(&C.Idents.get("valloc"));
}
void PointerArithChecker::checkPostStmt(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
const FunctionDecl *FD = C.getCalleeDecl(CE);
if (!FD)
return;
IdentifierInfo *FunI = FD->getIdentifier();
initAllocIdentifiers(C.getASTContext());
if (AllocFunctions.count(FunI) == 0)
return;
SVal SV = State->getSVal(CE, C.getLocationContext());
const MemRegion *Region = SV.getAsRegion();
if (!Region)
return;
// Assume that C allocation functions allocate arrays to avoid false
// positives.
// TODO: Add heuristics to distinguish alloc calls that allocates single
// objecs.
State = State->set<RegionState>(Region, AllocKind::Array);
C.addTransition(State);
}
void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
CheckerContext &C) const {
const FunctionDecl *FD = NE->getOperatorNew();
if (!FD)
return;
AllocKind Kind = getKindOfNewOp(NE, FD);
ProgramStateRef State = C.getState();
SVal AllocedVal = State->getSVal(NE, C.getLocationContext());
const MemRegion *Region = AllocedVal.getAsRegion();
if (!Region)
return;
State = State->set<RegionState>(Region, Kind);
C.addTransition(State);
}
void PointerArithChecker::checkPostStmt(const CastExpr *CE,
CheckerContext &C) const {
if (CE->getCastKind() != CastKind::CK_BitCast)
return;
const Expr *CastedExpr = CE->getSubExpr();
ProgramStateRef State = C.getState();
SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
const MemRegion *Region = CastedVal.getAsRegion();
if (!Region)
return;
// Suppress reinterpret casted hits.
State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
C.addTransition(State);
}
void PointerArithChecker::checkPreStmt(const CastExpr *CE,
CheckerContext &C) const {
if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
return;
const Expr *CastedExpr = CE->getSubExpr();
ProgramStateRef State = C.getState();
SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
const MemRegion *Region = CastedVal.getAsRegion();
if (!Region)
return;
if (const AllocKind *Kind = State->get<RegionState>(Region)) {
if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
return;
}
State = State->set<RegionState>(Region, AllocKind::Array);
C.addTransition(State);
}
void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
CheckerContext &C) const {
if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
return;
reportPointerArithMisuse(UOp->getSubExpr(), C, true);
}
void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext());
// Indexing with 0 is OK.
if (Idx.isZeroConstant())
return;
reportPointerArithMisuse(SubsExpr->getBase(), C);
}
void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
CheckerContext &C) const {
BinaryOperatorKind OpKind = BOp->getOpcode();
if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
return;
const Expr *Lhs = BOp->getLHS();
const Expr *Rhs = BOp->getRHS();
ProgramStateRef State = C.getState();
if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
SVal RHSVal = State->getSVal(Rhs, C.getLocationContext());
if (State->isNull(RHSVal).isConstrainedTrue())
return;
reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
}
// The int += ptr; case is not valid C++.
if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
SVal LHSVal = State->getSVal(Lhs, C.getLocationContext());
if (State->isNull(LHSVal).isConstrainedTrue())
return;
reportPointerArithMisuse(Rhs, C);
}
}

View File

@ -12,7 +12,7 @@ int a;
typedef int *vcreate_t(int *, DATA_TYPE, int, int);
void fn1(unsigned, unsigned) {
char b = 0;
for (; 1; a++, &b + a * 0) // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
for (; 1; a++, &b + a * 0)
;
}
@ -55,7 +55,7 @@ void fn1_1(void *p1, const void *p2) { p1 != p2; }
void fn2_1(uint32_t *p1, unsigned char *p2, uint32_t p3) {
unsigned i = 0;
for (0; i < p3; i++)
fn1_1(p1 + i, p2 + i * 0); // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
fn1_1(p1 + i, p2 + i * 0);
}
struct A_1 {

View File

@ -16,7 +16,7 @@ struct s {
void f() {
struct s a;
int *p = &(a.n) + 1;
int *p = &(a.n) + 1; // expected-warning{{Pointer arithmetic on}}
}
typedef struct {

View File

@ -52,7 +52,7 @@ void f4() {
void f5() {
int x, y;
int *p;
p = &x + 1; // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
p = &x + 1; // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}}
int a[10];
p = a + 1; // no-warning
@ -75,7 +75,7 @@ start:
clang_analyzer_eval(&a != 0); // expected-warning{{TRUE}}
clang_analyzer_eval(&a >= 0); // expected-warning{{TRUE}}
clang_analyzer_eval(&a > 0); // expected-warning{{TRUE}}
clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}} expected-warning{{Pointer arithmetic done on non-array variables}}
clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}}
// LHS is NULL, RHS is non-symbolic
// The same code is used for labels and non-symbolic values.

View File

@ -1,5 +1,4 @@
// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -verify %s
// expected-no-diagnostics
// RUN: %clang_cc1 -Wno-unused-value -std=c++14 -analyze -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s
struct X {
int *p;
int zero;
@ -20,3 +19,82 @@ int test (int *in) {
return 5/littleX.zero; // no-warning
}
class Base {};
class Derived : public Base {};
void checkPolymorphicUse() {
Derived d[10];
Base *p = d;
++p; // expected-warning{{Pointer arithmetic on a pointer to base class is dangerous}}
}
void checkBitCasts() {
long l;
char *p = (char*)&l;
p = p+2;
}
void checkBasicarithmetic(int i) {
int t[10];
int *p = t;
++p;
int a = 5;
p = &a;
++p; // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}}
p = p + 2; // expected-warning{{}}
p = 2 + p; // expected-warning{{}}
p += 2; // expected-warning{{}}
a += p[2]; // expected-warning{{}}
p = i*0 + p;
p = p + i*0;
p += i*0;
}
void checkArithOnSymbolic(int*p) {
++p;
p = p + 2;
p = 2 + p;
p += 2;
(void)p[2];
}
struct S {
int t[10];
};
void arrayInStruct() {
S s;
int * p = s.t;
++p;
S *sp = new S;
p = sp->t;
++p;
delete sp;
}
void checkNew() {
int *p = new int;
p[1] = 1; // expected-warning{{}}
}
void InitState(int* state) {
state[1] = 1; // expected-warning{{}}
}
int* getArray(int size) {
if (size == 0)
return new int;
return new int[5];
}
void checkConditionalArray() {
int* maybeArray = getArray(0);
InitState(maybeArray);
}
void checkMultiDimansionalArray() {
int a[5][5];
*(*(a+1)+2) = 2;
}

View File

@ -1,4 +1,4 @@
// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core %s -analyzer-store=region -verify
// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-disable-checker=alpha.core.PointerArithm %s -analyzer-store=region -verify
// expected-no-diagnostics
typedef int bar_return_t;