[clang] Fortify warning for scanf calls with field width too big.

Differential Revision: https://reviews.llvm.org/D111833
This commit is contained in:
Michael Benfield 2021-10-14 20:02:28 +00:00
parent e3bfb6a146
commit 2db66f8d48
3 changed files with 206 additions and 12 deletions

View File

@ -833,6 +833,10 @@ def warn_fortify_source_format_overflow : Warning<
" but format string expands to at least %2">, " but format string expands to at least %2">,
InGroup<FortifySource>; InGroup<FortifySource>;
def warn_fortify_scanf_overflow : Warning<
"'%0' may overflow; destination buffer in argument %1 has size "
"%2, but the corresponding specifier may require size %3">,
InGroup<FortifySource>;
/// main() /// main()
// static main() is not an error in C, just in C++. // static main() is not an error in C, just in C++.

View File

@ -408,6 +408,64 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
namespace { namespace {
class ScanfDiagnosticFormatHandler
: public analyze_format_string::FormatStringHandler {
// Accepts the argument index (relative to the first destination index) of the
// argument whose size we want.
using ComputeSizeFunction =
llvm::function_ref<Optional<llvm::APSInt>(unsigned)>;
// Accepts the argument index (relative to the first destination index), the
// destination size, and the source size).
using DiagnoseFunction =
llvm::function_ref<void(unsigned, unsigned, unsigned)>;
ComputeSizeFunction ComputeSizeArgument;
DiagnoseFunction Diagnose;
public:
ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument,
DiagnoseFunction Diagnose)
: ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {}
bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
const char *StartSpecifier,
unsigned specifierLen) override {
if (!FS.consumesDataArgument())
return true;
unsigned NulByte = 0;
switch ((FS.getConversionSpecifier().getKind())) {
default:
return true;
case analyze_format_string::ConversionSpecifier::sArg:
case analyze_format_string::ConversionSpecifier::ScanListArg:
NulByte = 1;
break;
case analyze_format_string::ConversionSpecifier::cArg:
break;
}
auto OptionalFW = FS.getFieldWidth();
if (OptionalFW.getHowSpecified() !=
analyze_format_string::OptionalAmount::HowSpecified::Constant)
return true;
unsigned SourceSize = OptionalFW.getConstantAmount() + NulByte;
auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex());
if (!DestSizeAPS)
return true;
unsigned DestSize = DestSizeAPS->getZExtValue();
if (DestSize < SourceSize)
Diagnose(FS.getArgIndex(), DestSize, SourceSize);
return true;
}
};
class EstimateSizeFormatHandler class EstimateSizeFormatHandler
: public analyze_format_string::FormatStringHandler { : public analyze_format_string::FormatStringHandler {
size_t Size; size_t Size;
@ -615,9 +673,12 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
// (potentially) more strict checking mode. Otherwise, conservatively assume // (potentially) more strict checking mode. Otherwise, conservatively assume
// type 0. // type 0.
int BOSType = 0; int BOSType = 0;
if (const auto *POS = // This check can fail for variadic functions.
FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>()) if (Index < FD->getNumParams()) {
BOSType = POS->getType(); if (const auto *POS =
FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
BOSType = POS->getType();
}
const Expr *ObjArg = TheCall->getArg(Index); const Expr *ObjArg = TheCall->getArg(Index);
uint64_t Result; uint64_t Result;
@ -642,6 +703,20 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
unsigned DiagID = 0; unsigned DiagID = 0;
bool IsChkVariant = false; bool IsChkVariant = false;
auto GetFunctionName = [&]() {
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
// Skim off the details of whichever builtin was called to produce a better
// diagnostic, as it's unlikely that the user wrote the __builtin
// explicitly.
if (IsChkVariant) {
FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
FunctionName = FunctionName.drop_back(std::strlen("_chk"));
} else if (FunctionName.startswith("__builtin_")) {
FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
}
return FunctionName;
};
switch (BuiltinID) { switch (BuiltinID) {
default: default:
return; return;
@ -661,6 +736,61 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
break; break;
} }
case Builtin::BIscanf:
case Builtin::BIfscanf:
case Builtin::BIsscanf: {
unsigned FormatIndex = 1;
unsigned DataIndex = 2;
if (BuiltinID == Builtin::BIscanf) {
FormatIndex = 0;
DataIndex = 1;
}
const auto *FormatExpr =
TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
const auto *Format = dyn_cast<StringLiteral>(FormatExpr);
if (!Format)
return;
if (!Format->isAscii() && !Format->isUTF8())
return;
auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize,
unsigned SourceSize) {
DiagID = diag::warn_fortify_scanf_overflow;
unsigned Index = ArgIndex + DataIndex;
StringRef FunctionName = GetFunctionName();
DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall,
PDiag(DiagID) << FunctionName << (Index + 1)
<< DestSize << SourceSize);
};
StringRef FormatStrRef = Format->getString();
auto ShiftedComputeSizeArgument = [&](unsigned Index) {
return ComputeSizeArgument(Index + DataIndex);
};
ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose);
const char *FormatBytes = FormatStrRef.data();
const ConstantArrayType *T =
Context.getAsConstantArrayType(Format->getType());
assert(T && "String literal not of constant array type!");
size_t TypeSize = T->getSize().getZExtValue();
// In case there's a null byte somewhere.
size_t StrLen =
std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
analyze_format_string::ParseScanfString(H, FormatBytes,
FormatBytes + StrLen, getLangOpts(),
Context.getTargetInfo());
// Unlike the other cases, in this one we have already issued the diagnostic
// here, so no need to continue (because unlike the other cases, here the
// diagnostic refers to the argument number).
return;
}
case Builtin::BIsprintf: case Builtin::BIsprintf:
case Builtin::BI__builtin___sprintf_chk: { case Builtin::BI__builtin___sprintf_chk: {
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@ -771,15 +901,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
SourceSize.getValue().ule(DestinationSize.getValue())) SourceSize.getValue().ule(DestinationSize.getValue()))
return; return;
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); StringRef FunctionName = GetFunctionName();
// Skim off the details of whichever builtin was called to produce a better
// diagnostic, as it's unlikely that the user wrote the __builtin explicitly.
if (IsChkVariant) {
FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
FunctionName = FunctionName.drop_back(std::strlen("_chk"));
} else if (FunctionName.startswith("__builtin_")) {
FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
}
SmallString<16> DestinationStr; SmallString<16> DestinationStr;
SmallString<16> SourceStr; SmallString<16> SourceStr;

View File

@ -0,0 +1,68 @@
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify
typedef struct _FILE FILE;
extern int scanf(const char *format, ...);
extern int fscanf(FILE *f, const char *format, ...);
extern int sscanf(const char *input, const char *format, ...);
void call_scanf() {
char buf10[10];
char buf20[20];
char buf30[30];
scanf("%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}}
scanf("%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 12}}
scanf("%4s %5s %9s", buf20, buf30, buf10);
scanf("%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}}
scanf("%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 22}}
scanf("%19s %5s %9s", buf20, buf30, buf10);
scanf("%19s %29s %9s", buf20, buf30, buf10);
scanf("%*21s %*30s %10s", buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 10, but the corresponding specifier may require size 11}}
scanf("%*21s %5s", buf10);
scanf("%10s %*30s", buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 10, but the corresponding specifier may require size 11}}
scanf("%9s %*30s", buf10);
scanf("%4[a] %5[a] %10[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}}
scanf("%4[a] %5[a] %11[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 12}}
scanf("%4[a] %5[a] %9[a]", buf20, buf30, buf10);
scanf("%20[a] %5[a] %9[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}}
scanf("%21[a] %5[a] %9[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 22}}
scanf("%19[a] %5[a] %9[a]", buf20, buf30, buf10);
scanf("%19[a] %29[a] %9[a]", buf20, buf30, buf10);
scanf("%4c %5c %10c", buf20, buf30, buf10);
scanf("%4c %5c %11c", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}}
scanf("%4c %5c %9c", buf20, buf30, buf10);
scanf("%20c %5c %9c", buf20, buf30, buf10);
scanf("%21c %5c %9c", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}}
// Don't warn for other specifiers.
int x;
scanf("%12d", &x);
}
void call_sscanf() {
char buf10[10];
char buf20[20];
char buf30[30];
sscanf("a b c", "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 11}}
sscanf("a b c", "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 12}}
sscanf("a b c", "%4s %5s %9s", buf20, buf30, buf10);
sscanf("a b c", "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 21}}
sscanf("a b c", "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 22}}
sscanf("a b c", "%19s %5s %9s", buf20, buf30, buf10);
sscanf("a b c", "%19s %29s %9s", buf20, buf30, buf10);
}
void call_fscanf() {
char buf10[10];
char buf20[20];
char buf30[30];
fscanf(0, "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 11}}
fscanf(0, "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 12}}
fscanf(0, "%4s %5s %9s", buf20, buf30, buf10);
fscanf(0, "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 21}}
fscanf(0, "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 22}}
fscanf(0, "%19s %5s %9s", buf20, buf30, buf10);
fscanf(0, "%19s %29s %9s", buf20, buf30, buf10);
}