[clang] Evaluate strlen of strcpy argument for -Wfortify-source.

Also introduce Expr::tryEvaluateStrLen.

Differential Revision: https://reviews.llvm.org/D104887
This commit is contained in:
Michael Benfield 2021-07-28 20:31:51 +00:00 committed by George Burgess IV
parent 6929bd6d00
commit e12e02df09
6 changed files with 170 additions and 111 deletions

View File

@ -740,6 +740,12 @@ public:
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
unsigned Type) const;
/// If the current Expr is a pointer, this will try to statically
/// determine the strlen of the string pointed to.
/// Returns true if all of the above holds and we were able to figure out the
/// strlen, false otherwise.
bool tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const;
/// Enumeration used to describe the kind of Null pointer constant
/// returned from \c isNullPointerConstant().
enum NullPointerConstantKind {

View File

@ -816,6 +816,11 @@ def warn_fortify_source_size_mismatch : Warning<
"'%0' size argument is too large; destination buffer has size %1,"
" but size argument is %2">, InGroup<FortifySource>;
def warn_fortify_strlen_overflow: Warning<
"'%0' will always overflow; destination buffer has size %1,"
" but the source string has length %2 (including NUL byte)">,
InGroup<FortifySource>;
def warn_fortify_source_format_overflow : Warning<
"'%0' will always overflow; destination buffer has size %1,"
" but format string expands to at least %2">,

View File

@ -1826,6 +1826,8 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
EvalInfo &Info);
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
EvalInfo &Info);
/// Evaluate an integer or fixed point expression into an APResult.
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@ -11836,46 +11838,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_wcslen: {
// As an extension, we support __builtin_strlen() as a constant expression,
// and support folding strlen() to a constant.
LValue String;
if (!EvaluatePointer(E->getArg(0), String, Info))
uint64_t StrLen;
if (EvaluateBuiltinStrLen(E->getArg(0), StrLen, Info))
return Success(StrLen, E);
return false;
QualType CharTy = E->getArg(0)->getType()->getPointeeType();
// Fast path: if it's a string literal, search the string value.
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
String.getLValueBase().dyn_cast<const Expr *>())) {
// The string literal may have embedded null characters. Find the first
// one and truncate there.
StringRef Str = S->getBytes();
int64_t Off = String.Offset.getQuantity();
if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
S->getCharByteWidth() == 1 &&
// FIXME: Add fast-path for wchar_t too.
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
Str = Str.substr(Off);
StringRef::size_type Pos = Str.find(0);
if (Pos != StringRef::npos)
Str = Str.substr(0, Pos);
return Success(Str.size(), E);
}
// Fall through to slow path to issue appropriate diagnostic.
}
// Slow path: scan the bytes of the string looking for the terminating 0.
for (uint64_t Strlen = 0; /**/; ++Strlen) {
APValue Char;
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
!Char.isInt())
return false;
if (!Char.getInt())
return Success(Strlen, E);
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
return false;
}
}
case Builtin::BIstrcmp:
@ -15736,3 +15702,58 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
}
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
EvalInfo &Info) {
if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
return false;
LValue String;
if (!EvaluatePointer(E, String, Info))
return false;
QualType CharTy = E->getType()->getPointeeType();
// Fast path: if it's a string literal, search the string value.
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
String.getLValueBase().dyn_cast<const Expr *>())) {
StringRef Str = S->getBytes();
int64_t Off = String.Offset.getQuantity();
if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
S->getCharByteWidth() == 1 &&
// FIXME: Add fast-path for wchar_t too.
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
Str = Str.substr(Off);
StringRef::size_type Pos = Str.find(0);
if (Pos != StringRef::npos)
Str = Str.substr(0, Pos);
Result = Str.size();
return true;
}
// Fall through to slow path.
}
// Slow path: scan the bytes of the string looking for the terminating 0.
for (uint64_t Strlen = 0; /**/; ++Strlen) {
APValue Char;
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
!Char.isInt())
return false;
if (!Char.getInt()) {
Result = Strlen;
return true;
}
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
return false;
}
}
bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
return EvaluateBuiltinStrLen(this, Result, Info);
}

View File

@ -588,14 +588,8 @@ private:
} // namespace
/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
/// __builtin_*_chk function, then use the object size argument specified in the
/// source. Otherwise, infer the object size using __builtin_object_size.
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
// FIXME: There are some more useful checks we could be doing here:
// - Evaluate strlen of strcpy arguments, use as object size.
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
isConstantEvaluated())
return;
@ -607,13 +601,66 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
const TargetInfo &TI = getASTContext().getTargetInfo();
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
auto ComputeExplicitObjectSizeArgument =
[&](unsigned Index) -> Optional<llvm::APSInt> {
Expr::EvalResult Result;
Expr *SizeArg = TheCall->getArg(Index);
if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
return llvm::None;
return Result.Val.getInt();
};
auto ComputeSizeArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
// If the parameter has a pass_object_size attribute, then we should use its
// (potentially) more strict checking mode. Otherwise, conservatively assume
// type 0.
int BOSType = 0;
if (const auto *POS =
FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
BOSType = POS->getType();
const Expr *ObjArg = TheCall->getArg(Index);
uint64_t Result;
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
return llvm::None;
// Get the object size in the target's size_t width.
return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
};
auto ComputeStrLenArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
Expr *ObjArg = TheCall->getArg(Index);
uint64_t Result;
if (!ObjArg->tryEvaluateStrLen(Result, getASTContext()))
return llvm::None;
// Add 1 for null byte.
return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
};
Optional<llvm::APSInt> SourceSize;
Optional<llvm::APSInt> DestinationSize;
unsigned DiagID = 0;
bool IsChkVariant = false;
Optional<llvm::APSInt> UsedSize;
unsigned SizeIndex, ObjectIndex;
switch (BuiltinID) {
default:
return;
case Builtin::BI__builtin_strcpy:
case Builtin::BIstrcpy: {
DiagID = diag::warn_fortify_strlen_overflow;
SourceSize = ComputeStrLenArgument(1);
DestinationSize = ComputeSizeArgument(0);
break;
}
case Builtin::BI__builtin___strcpy_chk: {
DiagID = diag::warn_fortify_strlen_overflow;
SourceSize = ComputeStrLenArgument(1);
DestinationSize = ComputeExplicitObjectSizeArgument(2);
IsChkVariant = true;
break;
}
case Builtin::BIsprintf:
case Builtin::BI__builtin___sprintf_chk: {
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@ -639,14 +686,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
Context.getTargetInfo(), false)) {
DiagID = diag::warn_fortify_source_format_overflow;
UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
.extOrTrunc(SizeTypeWidth);
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
DestinationSize = ComputeExplicitObjectSizeArgument(2);
IsChkVariant = true;
ObjectIndex = 2;
} else {
IsChkVariant = false;
ObjectIndex = 0;
DestinationSize = ComputeSizeArgument(0);
}
break;
}
@ -664,18 +710,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BI__builtin___memccpy_chk:
case Builtin::BI__builtin___mempcpy_chk: {
DiagID = diag::warn_builtin_chk_overflow;
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2);
DestinationSize =
ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
IsChkVariant = true;
SizeIndex = TheCall->getNumArgs() - 2;
ObjectIndex = TheCall->getNumArgs() - 1;
break;
}
case Builtin::BI__builtin___snprintf_chk:
case Builtin::BI__builtin___vsnprintf_chk: {
DiagID = diag::warn_builtin_chk_overflow;
SourceSize = ComputeExplicitObjectSizeArgument(1);
DestinationSize = ComputeExplicitObjectSizeArgument(3);
IsChkVariant = true;
SizeIndex = 1;
ObjectIndex = 3;
break;
}
@ -691,8 +738,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
// size larger than the destination buffer though; this is a runtime abort
// in _FORTIFY_SOURCE mode, and is quite suspicious otherwise.
DiagID = diag::warn_fortify_source_size_mismatch;
SizeIndex = TheCall->getNumArgs() - 1;
ObjectIndex = 0;
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
DestinationSize = ComputeSizeArgument(0);
break;
}
@ -705,8 +752,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BImempcpy:
case Builtin::BI__builtin_mempcpy: {
DiagID = diag::warn_fortify_source_overflow;
SizeIndex = TheCall->getNumArgs() - 1;
ObjectIndex = 0;
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
DestinationSize = ComputeSizeArgument(0);
break;
}
case Builtin::BIsnprintf:
@ -714,50 +761,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BIvsnprintf:
case Builtin::BI__builtin_vsnprintf: {
DiagID = diag::warn_fortify_source_size_mismatch;
SizeIndex = 1;
ObjectIndex = 0;
SourceSize = ComputeExplicitObjectSizeArgument(1);
DestinationSize = ComputeSizeArgument(0);
break;
}
}
llvm::APSInt ObjectSize;
// For __builtin___*_chk, the object size is explicitly provided by the caller
// (usually using __builtin_object_size). Use that value to check this call.
if (IsChkVariant) {
Expr::EvalResult Result;
Expr *SizeArg = TheCall->getArg(ObjectIndex);
if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
return;
ObjectSize = Result.Val.getInt();
// Otherwise, try to evaluate an imaginary call to __builtin_object_size.
} else {
// If the parameter has a pass_object_size attribute, then we should use its
// (potentially) more strict checking mode. Otherwise, conservatively assume
// type 0.
int BOSType = 0;
if (const auto *POS =
FD->getParamDecl(ObjectIndex)->getAttr<PassObjectSizeAttr>())
BOSType = POS->getType();
Expr *ObjArg = TheCall->getArg(ObjectIndex);
uint64_t Result;
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
return;
// Get the object size in the target's size_t width.
ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
}
// Evaluate the number of bytes of the object that this call will use.
if (!UsedSize) {
Expr::EvalResult Result;
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
return;
UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
}
if (UsedSize.getValue().ule(ObjectSize))
if (!SourceSize || !DestinationSize ||
SourceSize.getValue().ule(DestinationSize.getValue()))
return;
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@ -770,10 +781,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
}
SmallString<16> DestinationStr;
SmallString<16> SourceStr;
DestinationSize->toString(DestinationStr, /*Radix=*/10);
SourceSize->toString(SourceStr, /*Radix=*/10);
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
PDiag(DiagID)
<< FunctionName << toString(ObjectSize, /*Radix=*/10)
<< toString(UsedSize.getValue(), /*Radix=*/10));
<< FunctionName << DestinationStr << SourceStr);
}
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,

View File

@ -1,37 +1,37 @@
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter

View File

@ -58,6 +58,19 @@ void call_stpncpy() {
__builtin_stpncpy(s1, s2, 20); // expected-warning {{'stpncpy' size argument is too large; destination buffer has size 10, but size argument is 20}}
}
void call_strcpy() {
const char *const src = "abcd";
char dst[4];
__builtin_strcpy(dst, src); // expected-warning {{'strcpy' will always overflow; destination buffer has size 4, but the source string has length 5 (including NUL byte)}}
}
void call_strcpy_nowarn() {
const char *const src = "abcd";
char dst[5];
// We should not get a warning here.
__builtin_strcpy(dst, src);
}
void call_memmove() {
char s1[10], s2[20];
__builtin_memmove(s2, s1, 20);