Model the effects of strcpy() and stpcpy() in CStringChecker. Other changes:

- Fix memcpy() and friends to actually invalidate the destination buffer.
- Emit a different message for out-of-bounds buffer accesses if the buffer is being written to.
- When conjuring symbols, let ValueManager figure out the type.

llvm-svn: 111120
This commit is contained in:
Jordy Rose 2010-08-16 07:51:42 +00:00
parent ebab1ed5d3
commit 722f558f07
3 changed files with 320 additions and 25 deletions

View File

@ -22,10 +22,11 @@ using namespace clang;
namespace {
class CStringChecker : public CheckerVisitor<CStringChecker> {
BugType *BT_Null, *BT_Bounds, *BT_Overlap, *BT_NotCString;
BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString;
public:
CStringChecker()
: BT_Null(0), BT_Bounds(0), BT_Overlap(0), BT_NotCString(0) {}
: BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0)
{}
static void *getTag() { static int tag; return &tag; }
bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
@ -52,15 +53,24 @@ public:
void EvalStrlen(CheckerContext &C, const CallExpr *CE);
void EvalStrcpy(CheckerContext &C, const CallExpr *CE);
void EvalStpcpy(CheckerContext &C, const CallExpr *CE);
void EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd);
// Utility methods
std::pair<const GRState*, const GRState*>
AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
const GRState *SetCStringLength(const GRState *state, const MemRegion *MR,
SVal StrLen);
SVal GetCStringLengthForRegion(CheckerContext &C, const GRState *&state,
const Expr *Ex, const MemRegion *MR);
SVal GetCStringLength(CheckerContext &C, const GRState *&state,
const Expr *Ex, SVal Buf);
const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state,
const Expr *Ex, SVal V);
bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
const MemRegion *MR);
@ -68,11 +78,13 @@ public:
const GRState *CheckNonNull(CheckerContext &C, const GRState *state,
const Expr *S, SVal l);
const GRState *CheckLocation(CheckerContext &C, const GRState *state,
const Expr *S, SVal l);
const Expr *S, SVal l,
bool IsDestination = false);
const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
const Expr *Size,
const Expr *FirstBuf,
const Expr *SecondBuf = NULL);
const Expr *SecondBuf = NULL,
bool FirstIsDestination = false);
const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
const Expr *Size, const Expr *First,
const Expr *Second);
@ -156,7 +168,8 @@ const GRState *CStringChecker::CheckNonNull(CheckerContext &C,
// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
const GRState *CStringChecker::CheckLocation(CheckerContext &C,
const GRState *state,
const Expr *S, SVal l) {
const Expr *S, SVal l,
bool IsDestination) {
// If a previous check has failed, propagate the failure.
if (!state)
return NULL;
@ -189,17 +202,26 @@ const GRState *CStringChecker::CheckLocation(CheckerContext &C,
if (!N)
return NULL;
if (!BT_Bounds)
BT_Bounds = new BuiltinBug("Out-of-bound array access",
"Byte string function accesses out-of-bound array element "
"(buffer overflow)");
BuiltinBug *BT;
if (IsDestination) {
if (!BT_BoundsWrite) {
BT_BoundsWrite = new BuiltinBug("Out-of-bound array access",
"Byte string function overflows destination buffer");
}
BT = static_cast<BuiltinBug*>(BT_BoundsWrite);
} else {
if (!BT_Bounds) {
BT_Bounds = new BuiltinBug("Out-of-bound array access",
"Byte string function accesses out-of-bound array element");
}
BT = static_cast<BuiltinBug*>(BT_Bounds);
}
// FIXME: It would be nice to eventually make this diagnostic more clear,
// e.g., by referencing the original declaration or by saying *why* this
// reference is outside the range.
// Generate a report for this bug.
BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds);
RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
report->addRange(S->getSourceRange());
@ -216,7 +238,8 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
const GRState *state,
const Expr *Size,
const Expr *FirstBuf,
const Expr *SecondBuf) {
const Expr *SecondBuf,
bool FirstIsDestination) {
// If a previous check has failed, propagate the failure.
if (!state)
return NULL;
@ -250,7 +273,7 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
SVal BufEnd = SV.EvalBinOpLN(state, BinaryOperator::Add, *BufLoc,
LastOffset, PtrTy);
state = CheckLocation(C, state, FirstBuf, BufEnd);
state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
// If the buffer isn't large enough, abort.
if (!state)
@ -407,6 +430,42 @@ void CStringChecker::EmitOverlapBug(CheckerContext &C, const GRState *state,
C.EmitReport(report);
}
const GRState *CStringChecker::SetCStringLength(const GRState *state,
const MemRegion *MR,
SVal StrLen) {
assert(!StrLen.isUndef() && "Attempt to set an undefined string length");
if (StrLen.isUnknown())
return state;
MR = MR->StripCasts();
switch (MR->getKind()) {
case MemRegion::StringRegionKind:
// FIXME: This can happen if we strcpy() into a string region. This is
// undefined [C99 6.4.5p6], but we should still warn about it.
return state;
case MemRegion::SymbolicRegionKind:
case MemRegion::AllocaRegionKind:
case MemRegion::VarRegionKind:
case MemRegion::FieldRegionKind:
case MemRegion::ObjCIvarRegionKind:
return state->set<CStringLength>(MR, StrLen);
case MemRegion::ElementRegionKind:
// FIXME: Handle element regions by upper-bounding the parent region's
// string length.
return state;
default:
// Other regions (mostly non-data) can't have a reliable C string length.
// For now, just ignore the change.
// FIXME: These are rare but not impossible. We should output some kind of
// warning for things like strcpy((char[]){'a', 0}, "b");
return state;
}
}
SVal CStringChecker::GetCStringLengthForRegion(CheckerContext &C,
const GRState *&state,
const Expr *Ex,
@ -517,6 +576,37 @@ SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *&state,
}
}
const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
const GRState *state,
const Expr *E, SVal V) {
Loc *L = dyn_cast<Loc>(&V);
if (!L)
return state;
// FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
// some assumptions about the value that CFRefCount can't. Even so, it should
// probably be refactored.
if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
const MemRegion *R = MR->getRegion()->StripCasts();
// Are we dealing with an ElementRegion? If so, we should be invalidating
// the super-region.
if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
R = ER->getSuperRegion();
// FIXME: What about layers of ElementRegions?
}
// Invalidate this region.
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
return state->InvalidateRegion(R, E, Count, NULL);
}
// If we have a non-region value by chance, just remove the binding.
// FIXME: is this necessary or correct? This handles the non-Region
// cases. Is it ever valid to store to these?
return state->unbindLoc(*L);
}
bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
const MemRegion *MR) {
const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
@ -577,11 +667,20 @@ void CStringChecker::EvalCopyCommon(CheckerContext &C, const GRState *state,
// If the size can be nonzero, we have to check the other arguments.
if (StNonZeroSize) {
state = StNonZeroSize;
state = CheckBufferAccess(C, state, Size, Dest, Source);
state = CheckBufferAccess(C, state, Size, Dest, Source,
/* FirstIsDst = */ true);
if (Restricted)
state = CheckOverlap(C, state, Size, Dest, Source);
if (state)
if (state) {
// Invalidate the destination.
// FIXME: Even if we can't perfectly model the copy, we should see if we
// can use LazyCompoundVals to copy the source values into the destination.
// This would probably remove any existing bindings past the end of the
// copied region, but that's still an improvement over blank invalidation.
state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
C.addTransition(state);
}
}
}
@ -668,7 +767,7 @@ void CStringChecker::EvalMemcmp(CheckerContext &C, const CallExpr *CE) {
if (state) {
// The return value is the comparison result, which we don't know.
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
state = state->BindExpr(CE, CmpV);
C.addTransition(state);
}
@ -698,7 +797,7 @@ void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) {
if (StrLen.isUnknown()) {
ValueManager &ValMgr = C.getValueManager();
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
}
// Bind the return value.
@ -707,6 +806,90 @@ void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) {
}
}
void CStringChecker::EvalStrcpy(CheckerContext &C, const CallExpr *CE) {
// char *strcpy(char *restrict dst, const char *restrict src);
EvalStrcpyCommon(C, CE, /* ReturnEnd = */ false);
}
void CStringChecker::EvalStpcpy(CheckerContext &C, const CallExpr *CE) {
// char *stpcpy(char *restrict dst, const char *restrict src);
EvalStrcpyCommon(C, CE, /* ReturnEnd = */ true);
}
void CStringChecker::EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
bool ReturnEnd) {
const GRState *state = C.getState();
// Check that the destination is non-null
const Expr *Dst = CE->getArg(0);
SVal DstVal = state->getSVal(Dst);
state = CheckNonNull(C, state, Dst, DstVal);
if (!state)
return;
// Check that the source is non-null.
const Expr *Src = CE->getArg(1);
SVal SrcVal = state->getSVal(Src);
state = CheckNonNull(C, state, Src, SrcVal);
if (!state)
return;
// Get the string length of the source.
SVal StrLen = GetCStringLength(C, state, Src, SrcVal);
// If the source isn't a valid C string, give up.
if (StrLen.isUndef())
return;
SVal Result = (ReturnEnd ? UnknownVal() : DstVal);
// If the destination is a MemRegion, try to check for a buffer overflow and
// record the new string length.
if (loc::MemRegionVal *DstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
// If the length is known, we can check for an overflow.
if (NonLoc *KnownStrLen = dyn_cast<NonLoc>(&StrLen)) {
SValuator &SV = C.getSValuator();
SVal LastElement = SV.EvalBinOpLN(state, BinaryOperator::Add,
*DstRegVal, *KnownStrLen,
Dst->getType());
state = CheckLocation(C, state, Dst, LastElement, /* IsDst = */ true);
if (!state)
return;
// If this is a stpcpy-style copy, the last element is the return value.
if (ReturnEnd)
Result = LastElement;
}
// Invalidate the destination. This must happen before we set the C string
// length because invalidation will clear the length.
// FIXME: Even if we can't perfectly model the copy, we should see if we
// can use LazyCompoundVals to copy the source values into the destination.
// This would probably remove any existing bindings past the end of the
// string, but that's still an improvement over blank invalidation.
state = InvalidateBuffer(C, state, Dst, *DstRegVal);
// Set the C string length of the destination.
state = SetCStringLength(state, DstRegVal->getRegion(), StrLen);
}
// If this is a stpcpy-style copy, but we were unable to check for a buffer
// overflow, we still need a result. Conjure a return value.
if (ReturnEnd && Result.isUnknown()) {
ValueManager &ValMgr = C.getValueManager();
unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
}
// Set the return value.
state = state->BindExpr(CE, Result);
C.addTransition(state);
}
//===----------------------------------------------------------------------===//
// The driver method, and other Checker callbacks.
//===----------------------------------------------------------------------===//
@ -730,6 +913,8 @@ bool CStringChecker::EvalCallExpr(CheckerContext &C, const CallExpr *CE) {
.Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy)
.Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp)
.Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove)
.Cases("strcpy", "__strcpy_chk", &CStringChecker::EvalStrcpy)
.Cases("stpcpy", "__stpcpy_chk", &CStringChecker::EvalStpcpy)
.Case("strlen", &CStringChecker::EvalStrlen)
.Case("bcopy", &CStringChecker::EvalBcopy)
.Default(NULL);

View File

@ -48,27 +48,30 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
void memcpy0 () {
char src[] = {1, 2, 3, 4};
char dst[4];
char dst[4] = {0};
memcpy(dst, src, 4); // no-warning
if (memcpy(dst, src, 4) != dst) {
(void)*(char*)0; // no-warning
}
if (dst[0] != 0)
(void)*(char*)0; // expected-warning{{null}}
}
void memcpy1 () {
char src[] = {1, 2, 3, 4};
char dst[10];
memcpy(dst, src, 5); // expected-warning{{out-of-bound}}
memcpy(dst, src, 5); // expected-warning{{Byte string function accesses out-of-bound array element}}
}
void memcpy2 () {
char src[] = {1, 2, 3, 4};
char dst[1];
memcpy(dst, src, 4); // expected-warning{{out-of-bound}}
memcpy(dst, src, 4); // expected-warning{{Byte string function overflows destination buffer}}
}
void memcpy3 () {
@ -82,14 +85,14 @@ void memcpy4 () {
char src[] = {1, 2, 3, 4};
char dst[10];
memcpy(dst+2, src+2, 3); // expected-warning{{out-of-bound}}
memcpy(dst+2, src+2, 3); // expected-warning{{Byte string function accesses out-of-bound array element}}
}
void memcpy5() {
char src[] = {1, 2, 3, 4};
char dst[3];
memcpy(dst+2, src+2, 2); // expected-warning{{out-of-bound}}
memcpy(dst+2, src+2, 2); // expected-warning{{Byte string function overflows destination buffer}}
}
void memcpy6() {
@ -150,13 +153,16 @@ void *memmove(void *s1, const void *s2, size_t n);
void memmove0 () {
char src[] = {1, 2, 3, 4};
char dst[4];
char dst[4] = {0};
memmove(dst, src, 4); // no-warning
if (memmove(dst, src, 4) != dst) {
(void)*(char*)0; // no-warning
}
if (dst[0] != 0)
(void)*(char*)0; // expected-warning{{null}}
}
void memmove1 () {
@ -170,7 +176,7 @@ void memmove2 () {
char src[] = {1, 2, 3, 4};
char dst[1];
memmove(dst, src, 4); // expected-warning{{out-of-bound}}
memmove(dst, src, 4); // expected-warning{{overflow}}
}
//===----------------------------------------------------------------------===
@ -263,9 +269,12 @@ void bcopy(/*const*/ void *s1, void *s2, size_t n);
void bcopy0 () {
char src[] = {1, 2, 3, 4};
char dst[4];
char dst[4] = {0};
bcopy(src, dst, 4); // no-warning
if (dst[0] != 0)
(void)*(char*)0; // expected-warning{{null}}
}
void bcopy1 () {
@ -279,5 +288,5 @@ void bcopy2 () {
char src[] = {1, 2, 3, 4};
char dst[1];
bcopy(src, dst, 4); // expected-warning{{out-of-bound}}
bcopy(src, dst, 4); // expected-warning{{overflow}}
}

View File

@ -24,6 +24,7 @@
# define BUILTIN(f) f
#endif /* USE_BUILTINS */
#define NULL 0
typedef typeof(sizeof(int)) size_t;
//===----------------------------------------------------------------------===
@ -137,3 +138,103 @@ void strlen_liveness(const char *x) {
if (strlen(x) < 5)
(void)*(char*)0; // no-warning
}
//===----------------------------------------------------------------------===
// strcpy()
//===----------------------------------------------------------------------===
#ifdef VARIANT
#define __strcpy_chk BUILTIN(__strcpy_chk)
char *__strcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen);
#define strcpy(a,b) __strcpy_chk(a,b,(size_t)-1)
#else /* VARIANT */
#define strcpy BUILTIN(strcpy)
char *strcpy(char *restrict s1, const char *restrict s2);
#endif /* VARIANT */
void strcpy_null_dst(char *x) {
strcpy(NULL, x); // expected-warning{{Null pointer argument in call to byte string function}}
}
void strcpy_null_src(char *x) {
strcpy(x, NULL); // expected-warning{{Null pointer argument in call to byte string function}}
}
void strcpy_fn(char *x) {
strcpy(x, (char*)&strcpy_fn); // expected-warning{{Argument to byte string function is the address of the function 'strcpy_fn', which is not a null-terminated string}}
}
void strcpy_effects(char *x, char *y) {
char a = x[0];
if (strcpy(x, y) != x)
(void)*(char*)0; // no-warning
if (strlen(x) != strlen(y))
(void)*(char*)0; // no-warning
if (a != x[0])
(void)*(char*)0; // expected-warning{{null}}
}
void strcpy_overflow(char *y) {
char x[4];
if (strlen(y) == 4)
strcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}}
}
void strcpy_no_overflow(char *y) {
char x[4];
if (strlen(y) == 3)
strcpy(x, y); // no-warning
}
//===----------------------------------------------------------------------===
// stpcpy()
//===----------------------------------------------------------------------===
#ifdef VARIANT
#define __stpcpy_chk BUILTIN(__stpcpy_chk)
char *__stpcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen);
#define stpcpy(a,b) __stpcpy_chk(a,b,(size_t)-1)
#else /* VARIANT */
#define stpcpy BUILTIN(stpcpy)
char *stpcpy(char *restrict s1, const char *restrict s2);
#endif /* VARIANT */
void stpcpy_effect(char *x, char *y) {
char a = x[0];
if (stpcpy(x, y) != &x[strlen(y)])
(void)*(char*)0; // no-warning
if (strlen(x) != strlen(y))
(void)*(char*)0; // no-warning
if (a != x[0])
(void)*(char*)0; // expected-warning{{null}}
}
void stpcpy_overflow(char *y) {
char x[4];
if (strlen(y) == 4)
stpcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}}
}
void stpcpy_no_overflow(char *y) {
char x[4];
if (strlen(y) == 3)
stpcpy(x, y); // no-warning
}