Groundwork for C string length tracking. Currently only handles the length of constant string literals, which is not too helpful, and only calls to strlen() are checked.

llvm-svn: 109480
This commit is contained in:
Jordy Rose 2010-07-27 01:37:31 +00:00
parent 5c67986156
commit b052e8f436
2 changed files with 254 additions and 2 deletions

View File

@ -21,10 +21,10 @@ using namespace clang;
namespace {
class CStringChecker : public CheckerVisitor<CStringChecker> {
BugType *BT_Null, *BT_Bounds, *BT_Overlap;
BugType *BT_Null, *BT_Bounds, *BT_Overlap, *BT_NotCString;
public:
CStringChecker()
: BT_Null(0), BT_Bounds(0), BT_Overlap(0) {}
: BT_Null(0), BT_Bounds(0), BT_Overlap(0), BT_NotCString(0) {}
static void *getTag() { static int tag; return &tag; }
bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
@ -40,10 +40,19 @@ public:
void EvalMemcmp(CheckerContext &C, const CallExpr *CE);
void EvalStrlen(CheckerContext &C, const CallExpr *CE);
// Utility methods
std::pair<const GRState*, const GRState*>
AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
SVal GetCStringLength(CheckerContext &C, const GRState *state,
const Expr *Ex, SVal Buf);
bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
const MemRegion *MR);
// Re-usable checks
const GRState *CheckNonNull(CheckerContext &C, const GRState *state,
const Expr *S, SVal l);
const GRState *CheckLocation(CheckerContext &C, const GRState *state,
@ -369,6 +378,162 @@ void CStringChecker::EmitOverlapBug(CheckerContext &C, const GRState *state,
C.EmitReport(report);
}
SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *state,
const Expr *Ex, SVal Buf) {
const MemRegion *MR = Buf.getAsRegion();
if (!MR) {
// If we can't get a region, see if it's something we /know/ isn't a
// C string. In the context of locations, the only time we can issue such
// a warning is for labels.
if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
ExplodedNode *N = C.GenerateSink(state);
if (N) {
if (!BT_NotCString)
BT_NotCString = new BuiltinBug("API",
"Argument is not a null-terminated string.");
llvm::SmallString<120> buf;
llvm::raw_svector_ostream os(buf);
os << "Argument to byte string function is the address of the label '"
<< Label->getLabel()->getID()->getName()
<< "', which is not a null-terminated string";
// Generate a report for this bug.
EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
os.str(), N);
report->addRange(Ex->getSourceRange());
C.EmitReport(report);
}
return UndefinedVal();
}
// If it's not a region and not a label, it may be a constant location,
// or it may be unknown. Just conjure a value as usual (see end of method).
} else {
// If we have a region, strip casts from it and see if we can figure out
// its length. For anything we can't figure out, just conjure a value as
// usual (see end of method).
MR = MR->StripCasts();
switch (MR->getKind()) {
case MemRegion::StringRegionKind: {
ValueManager &ValMgr = C.getValueManager();
ASTContext &Ctx = ValMgr.getContext();
const StringLiteral *Str = cast<StringRegion>(MR)->getStringLiteral();
// Non-constant string literals may have been changed, so only return a
// known value if we know the literal is constant.
if (Str->getType().isConstant(Ctx)) {
QualType SizeTy = Ctx.getSizeType();
return ValMgr.makeIntVal(Str->getByteLength(), SizeTy);
}
// FIXME: Handle the non-constant case. For now, just treat it like any
// other initialized region.
// FALL-THROUGH
}
case MemRegion::SymbolicRegionKind:
case MemRegion::AllocaRegionKind:
case MemRegion::VarRegionKind:
case MemRegion::FieldRegionKind:
case MemRegion::ObjCIvarRegionKind:
// FIXME: These need to be tracked!
break;
case MemRegion::CompoundLiteralRegionKind:
// FIXME: Can we track this? Is it necessary?
break;
case MemRegion::ElementRegionKind:
// FIXME: How can we handle this? It's not good enough to subtract the
// offset from the base string length; consider "123\x00567" and &a[5].
break;
default: {
// Other regions (mostly non-data) can't have a reliable C string length.
// In this case, an error is emitted and UndefinedVal is returned.
// The caller should always be prepared to handle this case.
ExplodedNode *N = C.GenerateSink(state);
if (N) {
if (!BT_NotCString)
BT_NotCString = new BuiltinBug("API",
"Argument is not a null-terminated string.");
llvm::SmallString<120> buf;
llvm::raw_svector_ostream os(buf);
os << "Argument to byte string function is ";
if (SummarizeRegion(os, C.getASTContext(), MR)) {
os << ", which is not a null-terminated string";
} else {
os << "not a null-terminated string";
}
// Generate a report for this bug.
EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
os.str(), N);
report->addRange(Ex->getSourceRange());
C.EmitReport(report);
}
return UndefinedVal();
}
}
}
// If we can't track a certain region's C string length, or if we can't get a
// region from the SVal, conjure a value, for use in later constraints.
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
ValueManager &ValMgr = C.getValueManager();
QualType SizeTy = ValMgr.getContext().getSizeType();
return ValMgr.getConjuredSymbolVal(getTag(), Ex, SizeTy, Count);
}
bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
const MemRegion *MR) {
const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
if (!TR)
return false;
switch (TR->getKind()) {
case MemRegion::FunctionTextRegionKind: {
const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
if (FD)
os << "the address of the function '" << FD << "'";
else
os << "the address of a function";
return true;
}
case MemRegion::BlockTextRegionKind:
os << "block text";
return true;
case MemRegion::BlockDataRegionKind:
os << "a block";
return true;
case MemRegion::CXXThisRegionKind:
case MemRegion::CXXObjectRegionKind:
os << "a C++ object of type "
<< TR->getValueType(Ctx).getAsString();
return true;
case MemRegion::VarRegionKind:
os << "a variable of type"
<< TR->getValueType(Ctx).getAsString();
return true;
case MemRegion::FieldRegionKind:
os << "a field of type "
<< TR->getValueType(Ctx).getAsString();
return true;
case MemRegion::ObjCIvarRegionKind:
os << "an instance variable of type "
<< TR->getValueType(Ctx).getAsString();
return true;
default:
return false;
}
}
//===----------------------------------------------------------------------===//
// Evaluation of individual function calls.
//===----------------------------------------------------------------------===//
@ -489,6 +654,27 @@ void CStringChecker::EvalMemcmp(CheckerContext &C, const CallExpr *CE) {
}
}
void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) {
// size_t strlen(const char *s);
const GRState *state = C.getState();
const Expr *Arg = CE->getArg(0);
SVal ArgVal = state->getSVal(Arg);
// Check that the argument is non-null.
state = CheckNonNull(C, state, Arg, ArgVal);
if (state) {
// Figure out what the length is, making sure the argument is a C string
// (or something similar to a C string). If the argument is valid, the
// length will be defined, and we can then set the return value.
SVal StrLen = GetCStringLength(C, state, Arg, ArgVal);
if (!StrLen.isUndef()) {
state = state->BindExpr(CE, StrLen);
C.addTransition(state);
}
}
}
//===----------------------------------------------------------------------===//
// The driver method.
//===----------------------------------------------------------------------===//
@ -512,6 +698,7 @@ bool CStringChecker::EvalCallExpr(CheckerContext &C, const CallExpr *CE) {
.Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy)
.Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp)
.Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove)
.Case("strlen", &CStringChecker::EvalStrlen)
.Case("bcopy", &CStringChecker::EvalBcopy)
.Default(NULL);

View File

@ -0,0 +1,65 @@
// RUN: %clang_cc1 -analyze -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
// RUN: %clang_cc1 -analyze -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
//===----------------------------------------------------------------------===
// Declarations
//===----------------------------------------------------------------------===
// Some functions are so similar to each other that they follow the same code
// path, such as memcpy and __memcpy_chk, or memcmp and bcmp. If VARIANT is
// defined, make sure to use the variants instead to make sure they are still
// checked by the analyzer.
// Some functions are implemented as builtins. These should be #defined as
// BUILTIN(f), which will prepend "__builtin_" if USE_BUILTINS is defined.
// Functions that have variants and are also availabe as builtins should be
// declared carefully! See memcpy() for an example.
#ifdef USE_BUILTINS
# define BUILTIN(f) __builtin_ ## f
#else /* USE_BUILTINS */
# define BUILTIN(f) f
#endif /* USE_BUILTINS */
typedef typeof(sizeof(int)) size_t;
//===----------------------------------------------------------------------===
// strlen()
//===----------------------------------------------------------------------===
#define strlen BUILTIN(strlen)
size_t strlen(const char *s);
void strlen_constant0() {
if (strlen("123") != 3)
(void)*(char*)0; // expected-warning{{never executed}}
}
void strlen_constant1() {
const char *a = "123";
if (strlen(a) != 3)
(void)*(char*)0; // expected-warning{{never executed}}
}
void strlen_constant2(char x) {
char a[] = "123";
a[0] = x;
if (strlen(a) != 3)
(void)*(char*)0; // expected-warning{{null}}
}
size_t strlen_null() {
return strlen(0); // expected-warning{{Null pointer argument in call to byte string function}}
}
size_t strlen_fn() {
return strlen((char*)&strlen_fn); // expected-warning{{Argument to byte string function is the address of the function 'strlen_fn', which is not a null-terminated string}}
}
size_t strlen_nonloc() {
label:
return strlen((char*)&&label); // expected-warning{{Argument to byte string function is the address of the label 'label', which is not a null-terminated string}}
}